blob: 62f475b0d6cf2807c0762440c732e6c599e2e0ca [file] [log] [blame]
erg@chromium.orgd528f8b2012-05-11 17:31:08 +00001#!/usr/bin/env python
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002#
erg@google.com26970fa2009-11-17 18:07:32 +00003# Copyright (c) 2009 Google Inc. All rights reserved.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004#
erg@google.com26970fa2009-11-17 18:07:32 +00005# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00008#
erg@google.com26970fa2009-11-17 18:07:32 +00009# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15# * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +000018#
erg@google.com26970fa2009-11-17 18:07:32 +000019# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +000030
maruel@google.comfb2b8eb2009-04-23 21:03:42 +000031"""Does google-lint on c++ files.
32
33The goal of this script is to identify places in the code that *may*
34be in non-compliance with google style. It does not attempt to fix
35up these problems -- the point is to educate. It does also not
36attempt to find all problems, or to ensure that everything it does
37find is legitimately a problem.
38
39In particular, we can get very confused by /* and // inside strings!
40We do a small hack, which is to ignore //'s with "'s after them on the
41same line, but it is far from perfect (in either direction).
42"""
43
44import codecs
mazda@chromium.org3fffcec2013-06-07 01:04:53 +000045import copy
maruel@google.comfb2b8eb2009-04-23 21:03:42 +000046import getopt
47import math # for log
48import os
49import re
50import sre_compile
51import string
52import sys
53import unicodedata
54
55
56_USAGE = """
57Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +000058 [--counting=total|toplevel|detailed] [--root=subdir]
59 [--linelength=digits]
maruel@google.comfb2b8eb2009-04-23 21:03:42 +000060 <file> [file] ...
61
62 The style guidelines this tries to follow are those in
63 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
64
65 Every problem is given a confidence score from 1-5, with 5 meaning we are
66 certain of the problem, and 1 meaning it could be a legitimate construct.
67 This will miss some errors, and is not a substitute for a code review.
68
erg@google.com35589e62010-11-17 18:58:16 +000069 To suppress false-positive errors of a certain category, add a
70 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
71 suppresses errors of all categories on that line.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +000072
73 The files passed in will be linted; at least one file must be provided.
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +000074 Default linted extensions are .cc, .cpp, .cu, .cuh and .h. Change the
75 extensions with the --extensions flag.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +000076
77 Flags:
78
79 output=vs7
80 By default, the output is formatted to ease emacs parsing. Visual Studio
81 compatible output (vs7) may also be used. Other formats are unsupported.
82
83 verbose=#
84 Specify a number 0-5 to restrict errors to certain verbosity levels.
85
86 filter=-x,+y,...
87 Specify a comma-separated list of category-filters to apply: only
88 error messages whose category names pass the filters will be printed.
89 (Category names are printed with the message and look like
90 "[whitespace/indent]".) Filters are evaluated left to right.
91 "-FOO" and "FOO" means "do not print categories that start with FOO".
92 "+FOO" means "do print categories that start with FOO".
93
94 Examples: --filter=-whitespace,+whitespace/braces
95 --filter=whitespace,runtime/printf,+runtime/printf_format
96 --filter=-,+build/include_what_you_use
97
98 To see a list of all the categories used in cpplint, pass no arg:
99 --filter=
erg@google.com26970fa2009-11-17 18:07:32 +0000100
101 counting=total|toplevel|detailed
102 The total number of errors found is always printed. If
103 'toplevel' is provided, then the count of errors in each of
104 the top-level categories like 'build' and 'whitespace' will
105 also be printed. If 'detailed' is provided, then a count
106 is provided for each category like 'build/class'.
mazda@chromium.org3fffcec2013-06-07 01:04:53 +0000107
108 root=subdir
109 The root directory used for deriving header guard CPP variable.
110 By default, the header guard CPP variable is calculated as the relative
111 path to the directory that contains .git, .hg, or .svn. When this flag
112 is specified, the relative path is calculated from the specified
113 directory. If the specified directory does not exist, this flag is
114 ignored.
115
116 Examples:
avakulenko@google.comd39bbb52014-06-04 22:55:20 +0000117 Assuming that src/.git exists, the header guard CPP variables for
mazda@chromium.org3fffcec2013-06-07 01:04:53 +0000118 src/chrome/browser/ui/browser.h are:
119
120 No flag => CHROME_BROWSER_UI_BROWSER_H_
121 --root=chrome => BROWSER_UI_BROWSER_H_
122 --root=chrome/browser => UI_BROWSER_H_
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000123
124 linelength=digits
125 This is the allowed line length for the project. The default value is
126 80 characters.
127
128 Examples:
129 --linelength=120
130
131 extensions=extension,extension,...
132 The allowed file extensions that cpplint will check
133
134 Examples:
135 --extensions=hpp,cpp
avakulenko@google.com17449932014-07-28 22:13:33 +0000136
137 cpplint.py supports per-directory configurations specified in CPPLINT.cfg
138 files. CPPLINT.cfg file can contain a number of key=value pairs.
139 Currently the following options are supported:
140
141 set noparent
142 filter=+filter1,-filter2,...
143 exclude_files=regex
144
145 "set noparent" option prevents cpplint from traversing directory tree
146 upwards looking for more .cfg files in parent directories. This option
147 is usually placed in the top-level project directory.
148
149 The "filter" option is similar in function to --filter flag. It specifies
150 message filters in addition to the |_DEFAULT_FILTERS| and those specified
151 through --filter command-line flag.
152
153 "exclude_files" allows to specify a regular expression to be matched against
154 a file name. If the expression matches, the file is skipped and not run
155 through liner.
156
157 CPPLINT.cfg has an effect on files in the same directory and all
158 sub-directories, unless overridden by a nested configuration file.
159
160 Example file:
161 filter=-build/include_order,+build/include_alpha
162 exclude_files=.*\.cc
163
164 The above example disables build/include_order warning and enables
165 build/include_alpha as well as excludes all .cc from being
166 processed by linter, in the current directory (where the .cfg
167 file is located) and all sub-directories.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000168"""
169
170# We categorize each error message we print. Here are the categories.
171# We want an explicit list so we can list them all in cpplint --filter=.
172# If you add a new error message with a new category, add it to the list
173# here! cpplint_unittest.py should tell you if you forget to do this.
erg@google.com35589e62010-11-17 18:58:16 +0000174_ERROR_CATEGORIES = [
175 'build/class',
avakulenko@google.comd39bbb52014-06-04 22:55:20 +0000176 'build/c++11',
erg@google.com35589e62010-11-17 18:58:16 +0000177 'build/deprecated',
178 'build/endif_comment',
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +0000179 'build/explicit_make_pair',
erg@google.com35589e62010-11-17 18:58:16 +0000180 'build/forward_decl',
181 'build/header_guard',
182 'build/include',
183 'build/include_alpha',
184 'build/include_order',
185 'build/include_what_you_use',
186 'build/namespaces',
187 'build/printf_format',
188 'build/storage_class',
189 'legal/copyright',
mazda@chromium.org3fffcec2013-06-07 01:04:53 +0000190 'readability/alt_tokens',
erg@google.com35589e62010-11-17 18:58:16 +0000191 'readability/braces',
192 'readability/casting',
193 'readability/check',
194 'readability/constructors',
195 'readability/fn_size',
196 'readability/function',
197 'readability/multiline_comment',
198 'readability/multiline_string',
mazda@chromium.org3fffcec2013-06-07 01:04:53 +0000199 'readability/namespace',
erg@google.com35589e62010-11-17 18:58:16 +0000200 'readability/nolint',
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000201 'readability/nul',
erg@google.com35589e62010-11-17 18:58:16 +0000202 'readability/streams',
203 'readability/todo',
204 'readability/utf8',
205 'runtime/arrays',
206 'runtime/casting',
207 'runtime/explicit',
208 'runtime/int',
209 'runtime/init',
210 'runtime/invalid_increment',
211 'runtime/member_string_references',
212 'runtime/memset',
213 'runtime/operator',
214 'runtime/printf',
215 'runtime/printf_format',
216 'runtime/references',
erg@google.com35589e62010-11-17 18:58:16 +0000217 'runtime/string',
218 'runtime/threadsafe_fn',
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000219 'runtime/vlog',
erg@google.com35589e62010-11-17 18:58:16 +0000220 'whitespace/blank_line',
221 'whitespace/braces',
222 'whitespace/comma',
223 'whitespace/comments',
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000224 'whitespace/empty_conditional_body',
mazda@chromium.org3fffcec2013-06-07 01:04:53 +0000225 'whitespace/empty_loop_body',
erg@google.com35589e62010-11-17 18:58:16 +0000226 'whitespace/end_of_line',
227 'whitespace/ending_newline',
mazda@chromium.org3fffcec2013-06-07 01:04:53 +0000228 'whitespace/forcolon',
erg@google.com35589e62010-11-17 18:58:16 +0000229 'whitespace/indent',
erg@google.com35589e62010-11-17 18:58:16 +0000230 'whitespace/line_length',
231 'whitespace/newline',
232 'whitespace/operators',
233 'whitespace/parens',
234 'whitespace/semicolon',
235 'whitespace/tab',
236 'whitespace/todo'
237 ]
erg@google.com6317a9c2009-06-25 00:28:19 +0000238
avakulenko@google.comd39bbb52014-06-04 22:55:20 +0000239# The default state of the category filter. This is overridden by the --filter=
erg@google.com6317a9c2009-06-25 00:28:19 +0000240# flag. By default all errors are on, so only add here categories that should be
241# off by default (i.e., categories that must be enabled by the --filter= flags).
242# All entries here should start with a '-' or '+', as in the --filter= flag.
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +0000243_DEFAULT_FILTERS = ['-build/include_alpha']
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000244
245# We used to check for high-bit characters, but after much discussion we
246# decided those were OK, as long as they were in UTF-8 and didn't represent
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +0000247# hard-coded international strings, which belong in a separate i18n file.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000248
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000249# C++ headers
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000250_CPP_HEADERS = frozenset([
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000251 # Legacy
252 'algobase.h',
253 'algo.h',
254 'alloc.h',
255 'builtinbuf.h',
256 'bvector.h',
257 'complex.h',
258 'defalloc.h',
259 'deque.h',
260 'editbuf.h',
261 'fstream.h',
262 'function.h',
263 'hash_map',
264 'hash_map.h',
265 'hash_set',
266 'hash_set.h',
267 'hashtable.h',
268 'heap.h',
269 'indstream.h',
270 'iomanip.h',
271 'iostream.h',
272 'istream.h',
273 'iterator.h',
274 'list.h',
275 'map.h',
276 'multimap.h',
277 'multiset.h',
278 'ostream.h',
279 'pair.h',
280 'parsestream.h',
281 'pfstream.h',
282 'procbuf.h',
283 'pthread_alloc',
284 'pthread_alloc.h',
285 'rope',
286 'rope.h',
287 'ropeimpl.h',
288 'set.h',
289 'slist',
290 'slist.h',
291 'stack.h',
292 'stdiostream.h',
293 'stl_alloc.h',
294 'stl_relops.h',
295 'streambuf.h',
296 'stream.h',
297 'strfile.h',
298 'strstream.h',
299 'tempbuf.h',
300 'tree.h',
301 'type_traits.h',
302 'vector.h',
303 # 17.6.1.2 C++ library headers
304 'algorithm',
305 'array',
306 'atomic',
307 'bitset',
308 'chrono',
309 'codecvt',
310 'complex',
311 'condition_variable',
312 'deque',
313 'exception',
314 'forward_list',
315 'fstream',
316 'functional',
317 'future',
318 'initializer_list',
319 'iomanip',
320 'ios',
321 'iosfwd',
322 'iostream',
323 'istream',
324 'iterator',
325 'limits',
326 'list',
327 'locale',
328 'map',
329 'memory',
330 'mutex',
331 'new',
332 'numeric',
333 'ostream',
334 'queue',
335 'random',
336 'ratio',
337 'regex',
338 'set',
339 'sstream',
340 'stack',
341 'stdexcept',
342 'streambuf',
343 'string',
344 'strstream',
345 'system_error',
346 'thread',
347 'tuple',
348 'typeindex',
349 'typeinfo',
350 'type_traits',
351 'unordered_map',
352 'unordered_set',
353 'utility',
354 'valarray',
355 'vector',
356 # 17.6.1.2 C++ headers for C library facilities
357 'cassert',
358 'ccomplex',
359 'cctype',
360 'cerrno',
361 'cfenv',
362 'cfloat',
363 'cinttypes',
364 'ciso646',
365 'climits',
366 'clocale',
367 'cmath',
368 'csetjmp',
369 'csignal',
370 'cstdalign',
371 'cstdarg',
372 'cstdbool',
373 'cstddef',
374 'cstdint',
375 'cstdio',
376 'cstdlib',
377 'cstring',
378 'ctgmath',
379 'ctime',
380 'cuchar',
381 'cwchar',
382 'cwctype',
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000383 ])
384
avakulenko@google.comd39bbb52014-06-04 22:55:20 +0000385
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000386# Assertion macros. These are defined in base/logging.h and
387# testing/base/gunit.h. Note that the _M versions need to come first
388# for substring matching to work.
389_CHECK_MACROS = [
erg@google.com6317a9c2009-06-25 00:28:19 +0000390 'DCHECK', 'CHECK',
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000391 'EXPECT_TRUE_M', 'EXPECT_TRUE',
392 'ASSERT_TRUE_M', 'ASSERT_TRUE',
393 'EXPECT_FALSE_M', 'EXPECT_FALSE',
394 'ASSERT_FALSE_M', 'ASSERT_FALSE',
395 ]
396
erg@google.com6317a9c2009-06-25 00:28:19 +0000397# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000398_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
399
400for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
401 ('>=', 'GE'), ('>', 'GT'),
402 ('<=', 'LE'), ('<', 'LT')]:
erg@google.com6317a9c2009-06-25 00:28:19 +0000403 _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000404 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
405 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
406 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
407 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
408 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
409
410for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
411 ('>=', 'LT'), ('>', 'LE'),
412 ('<=', 'GT'), ('<', 'GE')]:
413 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
414 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
415 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
416 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
417
mazda@chromium.org3fffcec2013-06-07 01:04:53 +0000418# Alternative tokens and their replacements. For full list, see section 2.5
419# Alternative tokens [lex.digraph] in the C++ standard.
420#
421# Digraphs (such as '%:') are not included here since it's a mess to
422# match those on a word boundary.
423_ALT_TOKEN_REPLACEMENT = {
424 'and': '&&',
425 'bitor': '|',
426 'or': '||',
427 'xor': '^',
428 'compl': '~',
429 'bitand': '&',
430 'and_eq': '&=',
431 'or_eq': '|=',
432 'xor_eq': '^=',
433 'not': '!',
434 'not_eq': '!='
435 }
436
437# Compile regular expression that matches all the above keywords. The "[ =()]"
438# bit is meant to avoid matching these keywords outside of boolean expressions.
439#
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000440# False positives include C-style multi-line comments and multi-line strings
441# but those have always been troublesome for cpplint.
mazda@chromium.org3fffcec2013-06-07 01:04:53 +0000442_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
443 r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
444
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000445
446# These constants define types of headers for use with
447# _IncludeState.CheckNextIncludeOrder().
448_C_SYS_HEADER = 1
449_CPP_SYS_HEADER = 2
450_LIKELY_MY_HEADER = 3
451_POSSIBLE_MY_HEADER = 4
452_OTHER_HEADER = 5
453
mazda@chromium.org3fffcec2013-06-07 01:04:53 +0000454# These constants define the current inline assembly state
455_NO_ASM = 0 # Outside of inline assembly block
456_INSIDE_ASM = 1 # Inside inline assembly block
457_END_ASM = 2 # Last line of inline assembly block
458_BLOCK_ASM = 3 # The whole block is an inline assembly block
459
460# Match start of assembly blocks
461_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
462 r'(?:\s+(volatile|__volatile__))?'
463 r'\s*[{(]')
464
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000465
466_regexp_compile_cache = {}
467
erg@google.com35589e62010-11-17 18:58:16 +0000468# Finds occurrences of NOLINT or NOLINT(...).
469_RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
470
471# {str, set(int)}: a map from error categories to sets of linenumbers
472# on which those errors are expected and should be suppressed.
473_error_suppressions = {}
474
mazda@chromium.org3fffcec2013-06-07 01:04:53 +0000475# The root directory used for deriving header guard CPP variable.
476# This is set by --root flag.
477_root = None
478
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000479# The allowed line length of files.
480# This is set by --linelength flag.
481_line_length = 80
482
483# The allowed extensions for file names
484# This is set by --extensions flag.
485_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh'])
486
erg@google.com35589e62010-11-17 18:58:16 +0000487def ParseNolintSuppressions(filename, raw_line, linenum, error):
488 """Updates the global list of error-suppressions.
489
490 Parses any NOLINT comments on the current line, updating the global
491 error_suppressions store. Reports an error if the NOLINT comment
492 was malformed.
493
494 Args:
495 filename: str, the name of the input file.
496 raw_line: str, the line of input text, with comments.
497 linenum: int, the number of the current line.
498 error: function, an error handler.
499 """
500 # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +0000501 matched = _RE_SUPPRESSION.search(raw_line)
502 if matched:
503 category = matched.group(1)
erg@google.com35589e62010-11-17 18:58:16 +0000504 if category in (None, '(*)'): # => "suppress all"
505 _error_suppressions.setdefault(None, set()).add(linenum)
506 else:
507 if category.startswith('(') and category.endswith(')'):
508 category = category[1:-1]
509 if category in _ERROR_CATEGORIES:
510 _error_suppressions.setdefault(category, set()).add(linenum)
511 else:
512 error(filename, linenum, 'readability/nolint', 5,
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +0000513 'Unknown NOLINT error category: %s' % category)
erg@google.com35589e62010-11-17 18:58:16 +0000514
515
516def ResetNolintSuppressions():
517 "Resets the set of NOLINT suppressions to empty."
518 _error_suppressions.clear()
519
520
521def IsErrorSuppressedByNolint(category, linenum):
522 """Returns true if the specified error category is suppressed on this line.
523
524 Consults the global error_suppressions map populated by
525 ParseNolintSuppressions/ResetNolintSuppressions.
526
527 Args:
528 category: str, the category of the error.
529 linenum: int, the current line number.
530 Returns:
531 bool, True iff the error should be suppressed due to a NOLINT comment.
532 """
533 return (linenum in _error_suppressions.get(category, set()) or
534 linenum in _error_suppressions.get(None, set()))
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000535
avakulenko@google.comd39bbb52014-06-04 22:55:20 +0000536
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000537def Match(pattern, s):
538 """Matches the string with the pattern, caching the compiled regexp."""
539 # The regexp compilation caching is inlined in both Match and Search for
540 # performance reasons; factoring it out into a separate function turns out
541 # to be noticeably expensive.
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000542 if pattern not in _regexp_compile_cache:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000543 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
544 return _regexp_compile_cache[pattern].match(s)
545
546
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000547def ReplaceAll(pattern, rep, s):
548 """Replaces instances of pattern in a string with a replacement.
549
550 The compiled regex is kept in a cache shared by Match and Search.
551
552 Args:
553 pattern: regex pattern
554 rep: replacement text
555 s: search string
556
557 Returns:
558 string with replacements made (or original string if no replacements)
559 """
560 if pattern not in _regexp_compile_cache:
561 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
562 return _regexp_compile_cache[pattern].sub(rep, s)
563
564
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000565def Search(pattern, s):
566 """Searches the string for the pattern, caching the compiled regexp."""
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000567 if pattern not in _regexp_compile_cache:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000568 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
569 return _regexp_compile_cache[pattern].search(s)
570
571
572class _IncludeState(dict):
573 """Tracks line numbers for includes, and the order in which includes appear.
574
575 As a dict, an _IncludeState object serves as a mapping between include
576 filename and line number on which that file was included.
577
578 Call CheckNextIncludeOrder() once for each header in the file, passing
579 in the type constants defined above. Calls in an illegal order will
580 raise an _IncludeError with an appropriate error message.
581
582 """
583 # self._section will move monotonically through this set. If it ever
584 # needs to move backwards, CheckNextIncludeOrder will raise an error.
585 _INITIAL_SECTION = 0
586 _MY_H_SECTION = 1
587 _C_SECTION = 2
588 _CPP_SECTION = 3
589 _OTHER_H_SECTION = 4
590
591 _TYPE_NAMES = {
592 _C_SYS_HEADER: 'C system header',
593 _CPP_SYS_HEADER: 'C++ system header',
594 _LIKELY_MY_HEADER: 'header this file implements',
595 _POSSIBLE_MY_HEADER: 'header this file may implement',
596 _OTHER_HEADER: 'other header',
597 }
598 _SECTION_NAMES = {
599 _INITIAL_SECTION: "... nothing. (This can't be an error.)",
600 _MY_H_SECTION: 'a header this file implements',
601 _C_SECTION: 'C system header',
602 _CPP_SECTION: 'C++ system header',
603 _OTHER_H_SECTION: 'other header',
604 }
605
606 def __init__(self):
607 dict.__init__(self)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000608 self.ResetSection()
609
610 def ResetSection(self):
erg@google.com26970fa2009-11-17 18:07:32 +0000611 # The name of the current section.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000612 self._section = self._INITIAL_SECTION
erg@google.com26970fa2009-11-17 18:07:32 +0000613 # The path of last found header.
614 self._last_header = ''
615
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000616 def SetLastHeader(self, header_path):
617 self._last_header = header_path
618
erg@google.com26970fa2009-11-17 18:07:32 +0000619 def CanonicalizeAlphabeticalOrder(self, header_path):
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +0000620 """Returns a path canonicalized for alphabetical comparison.
erg@google.com26970fa2009-11-17 18:07:32 +0000621
622 - replaces "-" with "_" so they both cmp the same.
623 - removes '-inl' since we don't require them to be after the main header.
624 - lowercase everything, just in case.
625
626 Args:
627 header_path: Path to be canonicalized.
628
629 Returns:
630 Canonicalized path.
631 """
632 return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
633
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000634 def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
erg@google.com26970fa2009-11-17 18:07:32 +0000635 """Check if a header is in alphabetical order with the previous header.
636
637 Args:
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000638 clean_lines: A CleansedLines instance containing the file.
639 linenum: The number of the line to check.
640 header_path: Canonicalized header to be checked.
erg@google.com26970fa2009-11-17 18:07:32 +0000641
642 Returns:
643 Returns true if the header is in alphabetical order.
644 """
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +0000645 # If previous section is different from current section, _last_header will
646 # be reset to empty string, so it's always less than current header.
647 #
648 # If previous line was a blank line, assume that the headers are
649 # intentionally sorted the way they are.
650 if (self._last_header > header_path and
651 not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
erg@google.com26970fa2009-11-17 18:07:32 +0000652 return False
erg@google.com26970fa2009-11-17 18:07:32 +0000653 return True
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000654
655 def CheckNextIncludeOrder(self, header_type):
656 """Returns a non-empty error message if the next header is out of order.
657
658 This function also updates the internal state to be ready to check
659 the next include.
660
661 Args:
662 header_type: One of the _XXX_HEADER constants defined above.
663
664 Returns:
665 The empty string if the header is in the right order, or an
666 error message describing what's wrong.
667
668 """
669 error_message = ('Found %s after %s' %
670 (self._TYPE_NAMES[header_type],
671 self._SECTION_NAMES[self._section]))
672
erg@google.com26970fa2009-11-17 18:07:32 +0000673 last_section = self._section
674
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000675 if header_type == _C_SYS_HEADER:
676 if self._section <= self._C_SECTION:
677 self._section = self._C_SECTION
678 else:
erg@google.com26970fa2009-11-17 18:07:32 +0000679 self._last_header = ''
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000680 return error_message
681 elif header_type == _CPP_SYS_HEADER:
682 if self._section <= self._CPP_SECTION:
683 self._section = self._CPP_SECTION
684 else:
erg@google.com26970fa2009-11-17 18:07:32 +0000685 self._last_header = ''
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000686 return error_message
687 elif header_type == _LIKELY_MY_HEADER:
688 if self._section <= self._MY_H_SECTION:
689 self._section = self._MY_H_SECTION
690 else:
691 self._section = self._OTHER_H_SECTION
692 elif header_type == _POSSIBLE_MY_HEADER:
693 if self._section <= self._MY_H_SECTION:
694 self._section = self._MY_H_SECTION
695 else:
696 # This will always be the fallback because we're not sure
697 # enough that the header is associated with this file.
698 self._section = self._OTHER_H_SECTION
699 else:
700 assert header_type == _OTHER_HEADER
701 self._section = self._OTHER_H_SECTION
702
erg@google.com26970fa2009-11-17 18:07:32 +0000703 if last_section != self._section:
704 self._last_header = ''
705
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000706 return ''
707
708
709class _CppLintState(object):
710 """Maintains module-wide state.."""
711
712 def __init__(self):
713 self.verbose_level = 1 # global setting.
714 self.error_count = 0 # global count of reported errors
erg@google.com6317a9c2009-06-25 00:28:19 +0000715 # filters to apply when emitting error messages
716 self.filters = _DEFAULT_FILTERS[:]
avakulenko@google.com17449932014-07-28 22:13:33 +0000717 # backup of filter list. Used to restore the state after each file.
718 self._filters_backup = self.filters[:]
erg@google.com26970fa2009-11-17 18:07:32 +0000719 self.counting = 'total' # In what way are we counting errors?
720 self.errors_by_category = {} # string to int dict storing error counts
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000721
722 # output format:
723 # "emacs" - format that emacs can parse (default)
724 # "vs7" - format that Microsoft Visual Studio 7 can parse
725 self.output_format = 'emacs'
726
727 def SetOutputFormat(self, output_format):
728 """Sets the output format for errors."""
729 self.output_format = output_format
730
731 def SetVerboseLevel(self, level):
732 """Sets the module's verbosity, and returns the previous setting."""
733 last_verbose_level = self.verbose_level
734 self.verbose_level = level
735 return last_verbose_level
736
erg@google.com26970fa2009-11-17 18:07:32 +0000737 def SetCountingStyle(self, counting_style):
738 """Sets the module's counting options."""
739 self.counting = counting_style
740
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000741 def SetFilters(self, filters):
742 """Sets the error-message filters.
743
744 These filters are applied when deciding whether to emit a given
745 error message.
746
747 Args:
748 filters: A string of comma-separated filters (eg "+whitespace/indent").
749 Each filter should start with + or -; else we die.
erg@google.com6317a9c2009-06-25 00:28:19 +0000750
751 Raises:
752 ValueError: The comma-separated filters did not all start with '+' or '-'.
753 E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000754 """
erg@google.com6317a9c2009-06-25 00:28:19 +0000755 # Default filters always have less priority than the flag ones.
756 self.filters = _DEFAULT_FILTERS[:]
avakulenko@google.com17449932014-07-28 22:13:33 +0000757 self.AddFilters(filters)
758
759 def AddFilters(self, filters):
760 """ Adds more filters to the existing list of error-message filters. """
erg@google.com6317a9c2009-06-25 00:28:19 +0000761 for filt in filters.split(','):
762 clean_filt = filt.strip()
763 if clean_filt:
764 self.filters.append(clean_filt)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000765 for filt in self.filters:
766 if not (filt.startswith('+') or filt.startswith('-')):
767 raise ValueError('Every filter in --filters must start with + or -'
768 ' (%s does not)' % filt)
769
avakulenko@google.com17449932014-07-28 22:13:33 +0000770 def BackupFilters(self):
771 """ Saves the current filter list to backup storage."""
772 self._filters_backup = self.filters[:]
773
774 def RestoreFilters(self):
775 """ Restores filters previously backed up."""
776 self.filters = self._filters_backup[:]
777
erg@google.com26970fa2009-11-17 18:07:32 +0000778 def ResetErrorCounts(self):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000779 """Sets the module's error statistic back to zero."""
780 self.error_count = 0
erg@google.com26970fa2009-11-17 18:07:32 +0000781 self.errors_by_category = {}
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000782
erg@google.com26970fa2009-11-17 18:07:32 +0000783 def IncrementErrorCount(self, category):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000784 """Bumps the module's error statistic."""
785 self.error_count += 1
erg@google.com26970fa2009-11-17 18:07:32 +0000786 if self.counting in ('toplevel', 'detailed'):
787 if self.counting != 'detailed':
788 category = category.split('/')[0]
789 if category not in self.errors_by_category:
790 self.errors_by_category[category] = 0
791 self.errors_by_category[category] += 1
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000792
erg@google.com26970fa2009-11-17 18:07:32 +0000793 def PrintErrorCounts(self):
794 """Print a summary of errors by category, and the total."""
795 for category, count in self.errors_by_category.iteritems():
796 sys.stderr.write('Category \'%s\' errors found: %d\n' %
797 (category, count))
798 sys.stderr.write('Total errors found: %d\n' % self.error_count)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000799
800_cpplint_state = _CppLintState()
801
802
803def _OutputFormat():
804 """Gets the module's output format."""
805 return _cpplint_state.output_format
806
807
808def _SetOutputFormat(output_format):
809 """Sets the module's output format."""
810 _cpplint_state.SetOutputFormat(output_format)
811
812
813def _VerboseLevel():
814 """Returns the module's verbosity setting."""
815 return _cpplint_state.verbose_level
816
817
818def _SetVerboseLevel(level):
819 """Sets the module's verbosity, and returns the previous setting."""
820 return _cpplint_state.SetVerboseLevel(level)
821
822
erg@google.com26970fa2009-11-17 18:07:32 +0000823def _SetCountingStyle(level):
824 """Sets the module's counting options."""
825 _cpplint_state.SetCountingStyle(level)
826
827
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000828def _Filters():
829 """Returns the module's list of output filters, as a list."""
830 return _cpplint_state.filters
831
832
833def _SetFilters(filters):
834 """Sets the module's error-message filters.
835
836 These filters are applied when deciding whether to emit a given
837 error message.
838
839 Args:
840 filters: A string of comma-separated filters (eg "whitespace/indent").
841 Each filter should start with + or -; else we die.
842 """
843 _cpplint_state.SetFilters(filters)
844
avakulenko@google.com17449932014-07-28 22:13:33 +0000845def _AddFilters(filters):
846 """Adds more filter overrides.
847
848 Unlike _SetFilters, this function does not reset the current list of filters
849 available.
850
851 Args:
852 filters: A string of comma-separated filters (eg "whitespace/indent").
853 Each filter should start with + or -; else we die.
854 """
855 _cpplint_state.AddFilters(filters)
856
857def _BackupFilters():
858 """ Saves the current filter list to backup storage."""
859 _cpplint_state.BackupFilters()
860
861def _RestoreFilters():
862 """ Restores filters previously backed up."""
863 _cpplint_state.RestoreFilters()
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000864
865class _FunctionState(object):
866 """Tracks current function name and the number of lines in its body."""
867
868 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
869 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
870
871 def __init__(self):
872 self.in_a_function = False
873 self.lines_in_function = 0
874 self.current_function = ''
875
876 def Begin(self, function_name):
877 """Start analyzing function body.
878
879 Args:
880 function_name: The name of the function being tracked.
881 """
882 self.in_a_function = True
883 self.lines_in_function = 0
884 self.current_function = function_name
885
886 def Count(self):
887 """Count line in current function body."""
888 if self.in_a_function:
889 self.lines_in_function += 1
890
891 def Check(self, error, filename, linenum):
892 """Report if too many lines in function body.
893
894 Args:
895 error: The function to call with any errors found.
896 filename: The name of the current file.
897 linenum: The number of the line to check.
898 """
899 if Match(r'T(EST|est)', self.current_function):
900 base_trigger = self._TEST_TRIGGER
901 else:
902 base_trigger = self._NORMAL_TRIGGER
903 trigger = base_trigger * 2**_VerboseLevel()
904
905 if self.lines_in_function > trigger:
906 error_level = int(math.log(self.lines_in_function / base_trigger, 2))
907 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
908 if error_level > 5:
909 error_level = 5
910 error(filename, linenum, 'readability/fn_size', error_level,
911 'Small and focused functions are preferred:'
912 ' %s has %d non-comment lines'
913 ' (error triggered by exceeding %d lines).' % (
914 self.current_function, self.lines_in_function, trigger))
915
916 def End(self):
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +0000917 """Stop analyzing function body."""
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000918 self.in_a_function = False
919
920
921class _IncludeError(Exception):
922 """Indicates a problem with the include order in a file."""
923 pass
924
925
926class FileInfo:
927 """Provides utility functions for filenames.
928
929 FileInfo provides easy access to the components of a file's path
930 relative to the project root.
931 """
932
933 def __init__(self, filename):
934 self._filename = filename
935
936 def FullName(self):
937 """Make Windows paths like Unix."""
938 return os.path.abspath(self._filename).replace('\\', '/')
939
940 def RepositoryName(self):
941 """FullName after removing the local path to the repository.
942
943 If we have a real absolute path name here we can try to do something smart:
944 detecting the root of the checkout and truncating /path/to/checkout from
945 the name so that we get header guards that don't include things like
946 "C:\Documents and Settings\..." or "/home/username/..." in them and thus
947 people on different computers who have checked the source out to different
948 locations won't see bogus errors.
949 """
950 fullname = self.FullName()
951
952 if os.path.exists(fullname):
953 project_dir = os.path.dirname(fullname)
954
955 if os.path.exists(os.path.join(project_dir, ".svn")):
956 # If there's a .svn file in the current directory, we recursively look
957 # up the directory tree for the top of the SVN checkout
958 root_dir = project_dir
959 one_up_dir = os.path.dirname(root_dir)
960 while os.path.exists(os.path.join(one_up_dir, ".svn")):
961 root_dir = os.path.dirname(root_dir)
962 one_up_dir = os.path.dirname(one_up_dir)
963
964 prefix = os.path.commonprefix([root_dir, project_dir])
965 return fullname[len(prefix) + 1:]
966
erg@chromium.org7956a872011-11-30 01:44:03 +0000967 # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
968 # searching up from the current path.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000969 root_dir = os.path.dirname(fullname)
970 while (root_dir != os.path.dirname(root_dir) and
erg@google.com35589e62010-11-17 18:58:16 +0000971 not os.path.exists(os.path.join(root_dir, ".git")) and
erg@chromium.org7956a872011-11-30 01:44:03 +0000972 not os.path.exists(os.path.join(root_dir, ".hg")) and
973 not os.path.exists(os.path.join(root_dir, ".svn"))):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000974 root_dir = os.path.dirname(root_dir)
erg@google.com35589e62010-11-17 18:58:16 +0000975
976 if (os.path.exists(os.path.join(root_dir, ".git")) or
erg@chromium.org7956a872011-11-30 01:44:03 +0000977 os.path.exists(os.path.join(root_dir, ".hg")) or
978 os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com35589e62010-11-17 18:58:16 +0000979 prefix = os.path.commonprefix([root_dir, project_dir])
980 return fullname[len(prefix) + 1:]
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000981
982 # Don't know what to do; header guard warnings may be wrong...
983 return fullname
984
985 def Split(self):
986 """Splits the file into the directory, basename, and extension.
987
988 For 'chrome/browser/browser.cc', Split() would
989 return ('chrome/browser', 'browser', '.cc')
990
991 Returns:
992 A tuple of (directory, basename, extension).
993 """
994
995 googlename = self.RepositoryName()
996 project, rest = os.path.split(googlename)
997 return (project,) + os.path.splitext(rest)
998
999 def BaseName(self):
1000 """File base name - text after the final slash, before the final period."""
1001 return self.Split()[1]
1002
1003 def Extension(self):
1004 """File extension - text following the final period."""
1005 return self.Split()[2]
1006
1007 def NoExtension(self):
1008 """File has no source file extension."""
1009 return '/'.join(self.Split()[0:2])
1010
1011 def IsSource(self):
1012 """File has a source file extension."""
1013 return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
1014
1015
erg@google.com35589e62010-11-17 18:58:16 +00001016def _ShouldPrintError(category, confidence, linenum):
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00001017 """If confidence >= verbose, category passes filter and is not suppressed."""
erg@google.com35589e62010-11-17 18:58:16 +00001018
1019 # There are three ways we might decide not to print an error message:
1020 # a "NOLINT(category)" comment appears in the source,
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001021 # the verbosity level isn't high enough, or the filters filter it out.
erg@google.com35589e62010-11-17 18:58:16 +00001022 if IsErrorSuppressedByNolint(category, linenum):
1023 return False
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001024
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001025 if confidence < _cpplint_state.verbose_level:
1026 return False
1027
1028 is_filtered = False
1029 for one_filter in _Filters():
1030 if one_filter.startswith('-'):
1031 if category.startswith(one_filter[1:]):
1032 is_filtered = True
1033 elif one_filter.startswith('+'):
1034 if category.startswith(one_filter[1:]):
1035 is_filtered = False
1036 else:
1037 assert False # should have been checked for in SetFilter.
1038 if is_filtered:
1039 return False
1040
1041 return True
1042
1043
1044def Error(filename, linenum, category, confidence, message):
1045 """Logs the fact we've found a lint error.
1046
1047 We log where the error was found, and also our confidence in the error,
1048 that is, how certain we are this is a legitimate style regression, and
1049 not a misidentification or a use that's sometimes justified.
1050
erg@google.com35589e62010-11-17 18:58:16 +00001051 False positives can be suppressed by the use of
1052 "cpplint(category)" comments on the offending line. These are
1053 parsed into _error_suppressions.
1054
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001055 Args:
1056 filename: The name of the file containing the error.
1057 linenum: The number of the line containing the error.
1058 category: A string used to describe the "category" this bug
1059 falls under: "whitespace", say, or "runtime". Categories
1060 may have a hierarchy separated by slashes: "whitespace/indent".
1061 confidence: A number from 1-5 representing a confidence score for
1062 the error, with 5 meaning that we are certain of the problem,
1063 and 1 meaning that it could be a legitimate construct.
1064 message: The error message.
1065 """
erg@google.com35589e62010-11-17 18:58:16 +00001066 if _ShouldPrintError(category, confidence, linenum):
erg@google.com26970fa2009-11-17 18:07:32 +00001067 _cpplint_state.IncrementErrorCount(category)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001068 if _cpplint_state.output_format == 'vs7':
1069 sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
1070 filename, linenum, message, category, confidence))
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001071 elif _cpplint_state.output_format == 'eclipse':
1072 sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % (
1073 filename, linenum, message, category, confidence))
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001074 else:
1075 sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
1076 filename, linenum, message, category, confidence))
1077
1078
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001079# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001080_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
1081 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001082# Match a single C style comment on the same line.
1083_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/'
1084# Matches multi-line C style comments.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001085# This RE is a little bit more complicated than one might expect, because we
1086# have to take care of space removals tools so we can handle comments inside
1087# statements better.
1088# The current rule is: We only clear spaces from both sides when we're at the
1089# end of the line. Otherwise, we try to remove spaces from the right side,
1090# if this doesn't work we try on left side but only if there's a non-character
1091# on the right.
1092_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001093 r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' +
1094 _RE_PATTERN_C_COMMENTS + r'\s+|' +
1095 r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' +
1096 _RE_PATTERN_C_COMMENTS + r')')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001097
1098
1099def IsCppString(line):
1100 """Does line terminate so, that the next symbol is in string constant.
1101
1102 This function does not consider single-line nor multi-line comments.
1103
1104 Args:
1105 line: is a partial line of code starting from the 0..n.
1106
1107 Returns:
1108 True, if next character appended to 'line' is inside a
1109 string constant.
1110 """
1111
1112 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
1113 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
1114
1115
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001116def CleanseRawStrings(raw_lines):
1117 """Removes C++11 raw strings from lines.
1118
1119 Before:
1120 static const char kData[] = R"(
1121 multi-line string
1122 )";
1123
1124 After:
1125 static const char kData[] = ""
1126 (replaced by blank line)
1127 "";
1128
1129 Args:
1130 raw_lines: list of raw lines.
1131
1132 Returns:
1133 list of lines with C++11 raw strings replaced by empty strings.
1134 """
1135
1136 delimiter = None
1137 lines_without_raw_strings = []
1138 for line in raw_lines:
1139 if delimiter:
1140 # Inside a raw string, look for the end
1141 end = line.find(delimiter)
1142 if end >= 0:
1143 # Found the end of the string, match leading space for this
1144 # line and resume copying the original lines, and also insert
1145 # a "" on the last line.
1146 leading_space = Match(r'^(\s*)\S', line)
1147 line = leading_space.group(1) + '""' + line[end + len(delimiter):]
1148 delimiter = None
1149 else:
1150 # Haven't found the end yet, append a blank line.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001151 line = '""'
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001152
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001153 # Look for beginning of a raw string, and replace them with
1154 # empty strings. This is done in a loop to handle multiple raw
1155 # strings on the same line.
1156 while delimiter is None:
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001157 # Look for beginning of a raw string.
1158 # See 2.14.15 [lex.string] for syntax.
1159 matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
1160 if matched:
1161 delimiter = ')' + matched.group(2) + '"'
1162
1163 end = matched.group(3).find(delimiter)
1164 if end >= 0:
1165 # Raw string ended on same line
1166 line = (matched.group(1) + '""' +
1167 matched.group(3)[end + len(delimiter):])
1168 delimiter = None
1169 else:
1170 # Start of a multi-line raw string
1171 line = matched.group(1) + '""'
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001172 else:
1173 break
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001174
1175 lines_without_raw_strings.append(line)
1176
1177 # TODO(unknown): if delimiter is not None here, we might want to
1178 # emit a warning for unterminated string.
1179 return lines_without_raw_strings
1180
1181
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001182def FindNextMultiLineCommentStart(lines, lineix):
1183 """Find the beginning marker for a multiline comment."""
1184 while lineix < len(lines):
1185 if lines[lineix].strip().startswith('/*'):
1186 # Only return this marker if the comment goes beyond this line
1187 if lines[lineix].strip().find('*/', 2) < 0:
1188 return lineix
1189 lineix += 1
1190 return len(lines)
1191
1192
1193def FindNextMultiLineCommentEnd(lines, lineix):
1194 """We are inside a comment, find the end marker."""
1195 while lineix < len(lines):
1196 if lines[lineix].strip().endswith('*/'):
1197 return lineix
1198 lineix += 1
1199 return len(lines)
1200
1201
1202def RemoveMultiLineCommentsFromRange(lines, begin, end):
1203 """Clears a range of lines for multi-line comments."""
1204 # Having // dummy comments makes the lines non-empty, so we will not get
1205 # unnecessary blank line warnings later in the code.
1206 for i in range(begin, end):
1207 lines[i] = '// dummy'
1208
1209
1210def RemoveMultiLineComments(filename, lines, error):
1211 """Removes multiline (c-style) comments from lines."""
1212 lineix = 0
1213 while lineix < len(lines):
1214 lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
1215 if lineix_begin >= len(lines):
1216 return
1217 lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
1218 if lineix_end >= len(lines):
1219 error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
1220 'Could not find end of multi-line comment')
1221 return
1222 RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
1223 lineix = lineix_end + 1
1224
1225
1226def CleanseComments(line):
1227 """Removes //-comments and single-line C-style /* */ comments.
1228
1229 Args:
1230 line: A line of C++ source.
1231
1232 Returns:
1233 The line with single-line comments removed.
1234 """
1235 commentpos = line.find('//')
1236 if commentpos != -1 and not IsCppString(line[:commentpos]):
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00001237 line = line[:commentpos].rstrip()
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001238 # get rid of /* ... */
1239 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
1240
1241
erg@google.com6317a9c2009-06-25 00:28:19 +00001242class CleansedLines(object):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001243 """Holds 3 copies of all lines with different preprocessing applied to them.
1244
1245 1) elided member contains lines without strings and comments,
1246 2) lines member contains lines without comments, and
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001247 3) raw_lines member contains all the lines without processing.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001248 All these three members are of <type 'list'>, and of the same length.
1249 """
1250
1251 def __init__(self, lines):
1252 self.elided = []
1253 self.lines = []
1254 self.raw_lines = lines
1255 self.num_lines = len(lines)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001256 self.lines_without_raw_strings = CleanseRawStrings(lines)
1257 for linenum in range(len(self.lines_without_raw_strings)):
1258 self.lines.append(CleanseComments(
1259 self.lines_without_raw_strings[linenum]))
1260 elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001261 self.elided.append(CleanseComments(elided))
1262
1263 def NumLines(self):
1264 """Returns the number of lines represented."""
1265 return self.num_lines
1266
1267 @staticmethod
1268 def _CollapseStrings(elided):
1269 """Collapses strings and chars on a line to simple "" or '' blocks.
1270
1271 We nix strings first so we're not fooled by text like '"http://"'
1272
1273 Args:
1274 elided: The line being processed.
1275
1276 Returns:
1277 The line with collapsed strings.
1278 """
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001279 if _RE_PATTERN_INCLUDE.match(elided):
1280 return elided
1281
1282 # Remove escaped characters first to make quote/single quote collapsing
1283 # basic. Things that look like escaped characters shouldn't occur
1284 # outside of strings and chars.
1285 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
1286
1287 # Replace quoted strings and digit separators. Both single quotes
1288 # and double quotes are processed in the same loop, otherwise
1289 # nested quotes wouldn't work.
1290 collapsed = ''
1291 while True:
1292 # Find the first quote character
1293 match = Match(r'^([^\'"]*)([\'"])(.*)$', elided)
1294 if not match:
1295 collapsed += elided
1296 break
1297 head, quote, tail = match.groups()
1298
1299 if quote == '"':
1300 # Collapse double quoted strings
1301 second_quote = tail.find('"')
1302 if second_quote >= 0:
1303 collapsed += head + '""'
1304 elided = tail[second_quote + 1:]
1305 else:
1306 # Unmatched double quote, don't bother processing the rest
1307 # of the line since this is probably a multiline string.
1308 collapsed += elided
1309 break
1310 else:
1311 # Found single quote, check nearby text to eliminate digit separators.
1312 #
1313 # There is no special handling for floating point here, because
1314 # the integer/fractional/exponent parts would all be parsed
1315 # correctly as long as there are digits on both sides of the
1316 # separator. So we are fine as long as we don't see something
1317 # like "0.'3" (gcc 4.9.0 will not allow this literal).
1318 if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head):
1319 match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail)
1320 collapsed += head + match_literal.group(1).replace("'", '')
1321 elided = match_literal.group(2)
1322 else:
1323 second_quote = tail.find('\'')
1324 if second_quote >= 0:
1325 collapsed += head + "''"
1326 elided = tail[second_quote + 1:]
1327 else:
1328 # Unmatched single quote
1329 collapsed += elided
1330 break
1331
1332 return collapsed
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001333
1334
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001335def FindEndOfExpressionInLine(line, startpos, stack):
1336 """Find the position just after the end of current parenthesized expression.
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001337
1338 Args:
1339 line: a CleansedLines line.
1340 startpos: start searching at this position.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001341 stack: nesting stack at startpos.
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001342
1343 Returns:
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001344 On finding matching end: (index just after matching end, None)
1345 On finding an unclosed expression: (-1, None)
1346 Otherwise: (-1, new stack at end of this line)
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001347 """
1348 for i in xrange(startpos, len(line)):
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001349 char = line[i]
1350 if char in '([{':
1351 # Found start of parenthesized expression, push to expression stack
1352 stack.append(char)
1353 elif char == '<':
1354 # Found potential start of template argument list
1355 if i > 0 and line[i - 1] == '<':
1356 # Left shift operator
1357 if stack and stack[-1] == '<':
1358 stack.pop()
1359 if not stack:
1360 return (-1, None)
1361 elif i > 0 and Search(r'\boperator\s*$', line[0:i]):
1362 # operator<, don't add to stack
1363 continue
1364 else:
1365 # Tentative start of template argument list
1366 stack.append('<')
1367 elif char in ')]}':
1368 # Found end of parenthesized expression.
1369 #
1370 # If we are currently expecting a matching '>', the pending '<'
1371 # must have been an operator. Remove them from expression stack.
1372 while stack and stack[-1] == '<':
1373 stack.pop()
1374 if not stack:
1375 return (-1, None)
1376 if ((stack[-1] == '(' and char == ')') or
1377 (stack[-1] == '[' and char == ']') or
1378 (stack[-1] == '{' and char == '}')):
1379 stack.pop()
1380 if not stack:
1381 return (i + 1, None)
1382 else:
1383 # Mismatched parentheses
1384 return (-1, None)
1385 elif char == '>':
1386 # Found potential end of template argument list.
1387
1388 # Ignore "->" and operator functions
1389 if (i > 0 and
1390 (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))):
1391 continue
1392
1393 # Pop the stack if there is a matching '<'. Otherwise, ignore
1394 # this '>' since it must be an operator.
1395 if stack:
1396 if stack[-1] == '<':
1397 stack.pop()
1398 if not stack:
1399 return (i + 1, None)
1400 elif char == ';':
1401 # Found something that look like end of statements. If we are currently
1402 # expecting a '>', the matching '<' must have been an operator, since
1403 # template argument list should not contain statements.
1404 while stack and stack[-1] == '<':
1405 stack.pop()
1406 if not stack:
1407 return (-1, None)
1408
1409 # Did not find end of expression or unbalanced parentheses on this line
1410 return (-1, stack)
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001411
1412
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001413def CloseExpression(clean_lines, linenum, pos):
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001414 """If input points to ( or { or [ or <, finds the position that closes it.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001415
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001416 If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001417 linenum/pos that correspond to the closing of the expression.
1418
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001419 TODO(unknown): cpplint spends a fair bit of time matching parentheses.
1420 Ideally we would want to index all opening and closing parentheses once
1421 and have CloseExpression be just a simple lookup, but due to preprocessor
1422 tricks, this is not so easy.
1423
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001424 Args:
1425 clean_lines: A CleansedLines instance containing the file.
1426 linenum: The number of the line to check.
1427 pos: A position on the line.
1428
1429 Returns:
1430 A tuple (line, linenum, pos) pointer *past* the closing brace, or
1431 (line, len(lines), -1) if we never find a close. Note we ignore
1432 strings and comments when matching; and the line we return is the
1433 'cleansed' line at linenum.
1434 """
1435
1436 line = clean_lines.elided[linenum]
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001437 if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001438 return (line, clean_lines.NumLines(), -1)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001439
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001440 # Check first line
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001441 (end_pos, stack) = FindEndOfExpressionInLine(line, pos, [])
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001442 if end_pos > -1:
1443 return (line, linenum, end_pos)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001444
1445 # Continue scanning forward
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001446 while stack and linenum < clean_lines.NumLines() - 1:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001447 linenum += 1
1448 line = clean_lines.elided[linenum]
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001449 (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001450 if end_pos > -1:
1451 return (line, linenum, end_pos)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001452
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001453 # Did not find end of expression before end of file, give up
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001454 return (line, clean_lines.NumLines(), -1)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001455
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001456
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001457def FindStartOfExpressionInLine(line, endpos, stack):
1458 """Find position at the matching start of current expression.
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001459
1460 This is almost the reverse of FindEndOfExpressionInLine, but note
1461 that the input position and returned position differs by 1.
1462
1463 Args:
1464 line: a CleansedLines line.
1465 endpos: start searching at this position.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001466 stack: nesting stack at endpos.
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001467
1468 Returns:
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001469 On finding matching start: (index at matching start, None)
1470 On finding an unclosed expression: (-1, None)
1471 Otherwise: (-1, new stack at beginning of this line)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001472 """
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001473 i = endpos
1474 while i >= 0:
1475 char = line[i]
1476 if char in ')]}':
1477 # Found end of expression, push to expression stack
1478 stack.append(char)
1479 elif char == '>':
1480 # Found potential end of template argument list.
1481 #
1482 # Ignore it if it's a "->" or ">=" or "operator>"
1483 if (i > 0 and
1484 (line[i - 1] == '-' or
1485 Match(r'\s>=\s', line[i - 1:]) or
1486 Search(r'\boperator\s*$', line[0:i]))):
1487 i -= 1
1488 else:
1489 stack.append('>')
1490 elif char == '<':
1491 # Found potential start of template argument list
1492 if i > 0 and line[i - 1] == '<':
1493 # Left shift operator
1494 i -= 1
1495 else:
1496 # If there is a matching '>', we can pop the expression stack.
1497 # Otherwise, ignore this '<' since it must be an operator.
1498 if stack and stack[-1] == '>':
1499 stack.pop()
1500 if not stack:
1501 return (i, None)
1502 elif char in '([{':
1503 # Found start of expression.
1504 #
1505 # If there are any unmatched '>' on the stack, they must be
1506 # operators. Remove those.
1507 while stack and stack[-1] == '>':
1508 stack.pop()
1509 if not stack:
1510 return (-1, None)
1511 if ((char == '(' and stack[-1] == ')') or
1512 (char == '[' and stack[-1] == ']') or
1513 (char == '{' and stack[-1] == '}')):
1514 stack.pop()
1515 if not stack:
1516 return (i, None)
1517 else:
1518 # Mismatched parentheses
1519 return (-1, None)
1520 elif char == ';':
1521 # Found something that look like end of statements. If we are currently
1522 # expecting a '<', the matching '>' must have been an operator, since
1523 # template argument list should not contain statements.
1524 while stack and stack[-1] == '>':
1525 stack.pop()
1526 if not stack:
1527 return (-1, None)
1528
1529 i -= 1
1530
1531 return (-1, stack)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001532
1533
1534def ReverseCloseExpression(clean_lines, linenum, pos):
1535 """If input points to ) or } or ] or >, finds the position that opens it.
1536
1537 If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
1538 linenum/pos that correspond to the opening of the expression.
1539
1540 Args:
1541 clean_lines: A CleansedLines instance containing the file.
1542 linenum: The number of the line to check.
1543 pos: A position on the line.
1544
1545 Returns:
1546 A tuple (line, linenum, pos) pointer *at* the opening brace, or
1547 (line, 0, -1) if we never find the matching opening brace. Note
1548 we ignore strings and comments when matching; and the line we
1549 return is the 'cleansed' line at linenum.
1550 """
1551 line = clean_lines.elided[linenum]
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001552 if line[pos] not in ')}]>':
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001553 return (line, 0, -1)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001554
1555 # Check last line
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001556 (start_pos, stack) = FindStartOfExpressionInLine(line, pos, [])
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001557 if start_pos > -1:
1558 return (line, linenum, start_pos)
1559
1560 # Continue scanning backward
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001561 while stack and linenum > 0:
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001562 linenum -= 1
1563 line = clean_lines.elided[linenum]
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001564 (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001565 if start_pos > -1:
1566 return (line, linenum, start_pos)
1567
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001568 # Did not find start of expression before beginning of file, give up
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001569 return (line, 0, -1)
1570
1571
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001572def CheckForCopyright(filename, lines, error):
1573 """Logs an error if no Copyright message appears at the top of the file."""
1574
1575 # We'll say it should occur by line 10. Don't forget there's a
1576 # dummy line at the front.
1577 for line in xrange(1, min(len(lines), 11)):
1578 if re.search(r'Copyright', lines[line], re.I): break
1579 else: # means no copyright line was found
1580 error(filename, 0, 'legal/copyright', 5,
1581 'No copyright message found. '
1582 'You should have a line: "Copyright [year] <Copyright Owner>"')
1583
1584
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001585def GetIndentLevel(line):
1586 """Return the number of leading spaces in line.
1587
1588 Args:
1589 line: A string to check.
1590
1591 Returns:
1592 An integer count of leading spaces, possibly zero.
1593 """
1594 indent = Match(r'^( *)\S', line)
1595 if indent:
1596 return len(indent.group(1))
1597 else:
1598 return 0
1599
1600
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001601def GetHeaderGuardCPPVariable(filename):
1602 """Returns the CPP variable that should be used as a header guard.
1603
1604 Args:
1605 filename: The name of a C++ header file.
1606
1607 Returns:
1608 The CPP variable that should be used as a header guard in the
1609 named file.
1610
1611 """
1612
erg@google.com35589e62010-11-17 18:58:16 +00001613 # Restores original filename in case that cpplint is invoked from Emacs's
1614 # flymake.
1615 filename = re.sub(r'_flymake\.h$', '.h', filename)
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001616 filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
erg@google.com35589e62010-11-17 18:58:16 +00001617
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001618 fileinfo = FileInfo(filename)
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001619 file_path_from_root = fileinfo.RepositoryName()
1620 if _root:
1621 file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
1622 return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001623
1624
1625def CheckForHeaderGuard(filename, lines, error):
1626 """Checks that the file contains a header guard.
1627
erg@google.com6317a9c2009-06-25 00:28:19 +00001628 Logs an error if no #ifndef header guard is present. For other
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001629 headers, checks that the full pathname is used.
1630
1631 Args:
1632 filename: The name of the C++ header file.
1633 lines: An array of strings, each representing a line of the file.
1634 error: The function to call with any errors found.
1635 """
1636
1637 cppvar = GetHeaderGuardCPPVariable(filename)
1638
1639 ifndef = None
1640 ifndef_linenum = 0
1641 define = None
1642 endif = None
1643 endif_linenum = 0
1644 for linenum, line in enumerate(lines):
1645 linesplit = line.split()
1646 if len(linesplit) >= 2:
1647 # find the first occurrence of #ifndef and #define, save arg
1648 if not ifndef and linesplit[0] == '#ifndef':
1649 # set ifndef to the header guard presented on the #ifndef line.
1650 ifndef = linesplit[1]
1651 ifndef_linenum = linenum
1652 if not define and linesplit[0] == '#define':
1653 define = linesplit[1]
1654 # find the last occurrence of #endif, save entire line
1655 if line.startswith('#endif'):
1656 endif = line
1657 endif_linenum = linenum
1658
erg@chromium.orgc452fea2012-01-26 21:10:45 +00001659 if not ifndef:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001660 error(filename, 0, 'build/header_guard', 5,
1661 'No #ifndef header guard found, suggested CPP variable is: %s' %
1662 cppvar)
1663 return
1664
erg@chromium.orgc452fea2012-01-26 21:10:45 +00001665 if not define:
1666 error(filename, 0, 'build/header_guard', 5,
1667 'No #define header guard found, suggested CPP variable is: %s' %
1668 cppvar)
1669 return
1670
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001671 # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1672 # for backward compatibility.
erg@google.com35589e62010-11-17 18:58:16 +00001673 if ifndef != cppvar:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001674 error_level = 0
1675 if ifndef != cppvar + '_':
1676 error_level = 5
1677
erg@google.com35589e62010-11-17 18:58:16 +00001678 ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
1679 error)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001680 error(filename, ifndef_linenum, 'build/header_guard', error_level,
1681 '#ifndef header guard has wrong style, please use: %s' % cppvar)
1682
erg@chromium.orgc452fea2012-01-26 21:10:45 +00001683 if define != ifndef:
1684 error(filename, 0, 'build/header_guard', 5,
1685 '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
1686 cppvar)
1687 return
1688
erg@google.com35589e62010-11-17 18:58:16 +00001689 if endif != ('#endif // %s' % cppvar):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001690 error_level = 0
1691 if endif != ('#endif // %s' % (cppvar + '_')):
1692 error_level = 5
1693
erg@google.com35589e62010-11-17 18:58:16 +00001694 ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
1695 error)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001696 error(filename, endif_linenum, 'build/header_guard', error_level,
1697 '#endif line should be "#endif // %s"' % cppvar)
1698
1699
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001700def CheckForBadCharacters(filename, lines, error):
1701 """Logs an error for each line containing bad characters.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001702
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001703 Two kinds of bad characters:
1704
1705 1. Unicode replacement characters: These indicate that either the file
1706 contained invalid UTF-8 (likely) or Unicode replacement characters (which
1707 it shouldn't). Note that it's possible for this to throw off line
1708 numbering if the invalid UTF-8 occurred adjacent to a newline.
1709
1710 2. NUL bytes. These are problematic for some tools.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001711
1712 Args:
1713 filename: The name of the current file.
1714 lines: An array of strings, each representing a line of the file.
1715 error: The function to call with any errors found.
1716 """
1717 for linenum, line in enumerate(lines):
1718 if u'\ufffd' in line:
1719 error(filename, linenum, 'readability/utf8', 5,
1720 'Line contains invalid UTF-8 (or Unicode replacement character).')
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001721 if '\0' in line:
1722 error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001723
1724
1725def CheckForNewlineAtEOF(filename, lines, error):
1726 """Logs an error if there is no newline char at the end of the file.
1727
1728 Args:
1729 filename: The name of the current file.
1730 lines: An array of strings, each representing a line of the file.
1731 error: The function to call with any errors found.
1732 """
1733
1734 # The array lines() was created by adding two newlines to the
1735 # original file (go figure), then splitting on \n.
1736 # To verify that the file ends in \n, we just have to make sure the
1737 # last-but-two element of lines() exists and is empty.
1738 if len(lines) < 3 or lines[-2]:
1739 error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1740 'Could not find a newline character at the end of the file.')
1741
1742
1743def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1744 """Logs an error if we see /* ... */ or "..." that extend past one line.
1745
1746 /* ... */ comments are legit inside macros, for one line.
1747 Otherwise, we prefer // comments, so it's ok to warn about the
1748 other. Likewise, it's ok for strings to extend across multiple
1749 lines, as long as a line continuation character (backslash)
1750 terminates each line. Although not currently prohibited by the C++
1751 style guide, it's ugly and unnecessary. We don't do well with either
1752 in this lint program, so we warn about both.
1753
1754 Args:
1755 filename: The name of the current file.
1756 clean_lines: A CleansedLines instance containing the file.
1757 linenum: The number of the line to check.
1758 error: The function to call with any errors found.
1759 """
1760 line = clean_lines.elided[linenum]
1761
1762 # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1763 # second (escaped) slash may trigger later \" detection erroneously.
1764 line = line.replace('\\\\', '')
1765
1766 if line.count('/*') > line.count('*/'):
1767 error(filename, linenum, 'readability/multiline_comment', 5,
1768 'Complex multi-line /*...*/-style comment found. '
1769 'Lint may give bogus warnings. '
1770 'Consider replacing these with //-style comments, '
1771 'with #if 0...#endif, '
1772 'or with more clearly structured multi-line comments.')
1773
1774 if (line.count('"') - line.count('\\"')) % 2:
1775 error(filename, linenum, 'readability/multiline_string', 5,
1776 'Multi-line string ("...") found. This lint script doesn\'t '
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001777 'do well with such strings, and may give bogus warnings. '
1778 'Use C++11 raw strings or concatenation instead.')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001779
1780
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001781# (non-threadsafe name, thread-safe alternative, validation pattern)
1782#
1783# The validation pattern is used to eliminate false positives such as:
1784# _rand(); // false positive due to substring match.
1785# ->rand(); // some member function rand().
1786# ACMRandom rand(seed); // some variable named rand.
1787# ISAACRandom rand(); // another variable named rand.
1788#
1789# Basically we require the return value of these functions to be used
1790# in some expression context on the same line by matching on some
1791# operator before the function name. This eliminates constructors and
1792# member function calls.
1793_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)'
1794_THREADING_LIST = (
1795 ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'),
1796 ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'),
1797 ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'),
1798 ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'),
1799 ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'),
1800 ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'),
1801 ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'),
1802 ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'),
1803 ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'),
1804 ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'),
1805 ('strtok(', 'strtok_r(',
1806 _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'),
1807 ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'),
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001808 )
1809
1810
1811def CheckPosixThreading(filename, clean_lines, linenum, error):
1812 """Checks for calls to thread-unsafe functions.
1813
1814 Much code has been originally written without consideration of
1815 multi-threading. Also, engineers are relying on their old experience;
1816 they have learned posix before threading extensions were added. These
1817 tests guide the engineers to use thread-safe functions (when using
1818 posix directly).
1819
1820 Args:
1821 filename: The name of the current file.
1822 clean_lines: A CleansedLines instance containing the file.
1823 linenum: The number of the line to check.
1824 error: The function to call with any errors found.
1825 """
1826 line = clean_lines.elided[linenum]
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001827 for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST:
1828 # Additional pattern matching check to confirm that this is the
1829 # function we are looking for
1830 if Search(pattern, line):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001831 error(filename, linenum, 'runtime/threadsafe_fn', 2,
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001832 'Consider using ' + multithread_safe_func +
1833 '...) instead of ' + single_thread_func +
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001834 '...) for improved thread safety.')
1835
1836
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001837def CheckVlogArguments(filename, clean_lines, linenum, error):
1838 """Checks that VLOG() is only used for defining a logging level.
1839
1840 For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
1841 VLOG(FATAL) are not.
1842
1843 Args:
1844 filename: The name of the current file.
1845 clean_lines: A CleansedLines instance containing the file.
1846 linenum: The number of the line to check.
1847 error: The function to call with any errors found.
1848 """
1849 line = clean_lines.elided[linenum]
1850 if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
1851 error(filename, linenum, 'runtime/vlog', 5,
1852 'VLOG() should be used with numeric verbosity level. '
1853 'Use LOG() if you want symbolic severity levels.')
1854
erg@google.com26970fa2009-11-17 18:07:32 +00001855# Matches invalid increment: *count++, which moves pointer instead of
erg@google.com6317a9c2009-06-25 00:28:19 +00001856# incrementing a value.
erg@google.com26970fa2009-11-17 18:07:32 +00001857_RE_PATTERN_INVALID_INCREMENT = re.compile(
erg@google.com6317a9c2009-06-25 00:28:19 +00001858 r'^\s*\*\w+(\+\+|--);')
1859
1860
1861def CheckInvalidIncrement(filename, clean_lines, linenum, error):
erg@google.com26970fa2009-11-17 18:07:32 +00001862 """Checks for invalid increment *count++.
erg@google.com6317a9c2009-06-25 00:28:19 +00001863
1864 For example following function:
1865 void increment_counter(int* count) {
1866 *count++;
1867 }
1868 is invalid, because it effectively does count++, moving pointer, and should
1869 be replaced with ++*count, (*count)++ or *count += 1.
1870
1871 Args:
1872 filename: The name of the current file.
1873 clean_lines: A CleansedLines instance containing the file.
1874 linenum: The number of the line to check.
1875 error: The function to call with any errors found.
1876 """
1877 line = clean_lines.elided[linenum]
erg@google.com26970fa2009-11-17 18:07:32 +00001878 if _RE_PATTERN_INVALID_INCREMENT.match(line):
erg@google.com6317a9c2009-06-25 00:28:19 +00001879 error(filename, linenum, 'runtime/invalid_increment', 5,
1880 'Changing pointer instead of value (or unused value of operator*).')
1881
1882
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001883class _BlockInfo(object):
1884 """Stores information about a generic block of code."""
1885
1886 def __init__(self, seen_open_brace):
1887 self.seen_open_brace = seen_open_brace
1888 self.open_parentheses = 0
1889 self.inline_asm = _NO_ASM
1890
1891 def CheckBegin(self, filename, clean_lines, linenum, error):
1892 """Run checks that applies to text up to the opening brace.
1893
1894 This is mostly for checking the text after the class identifier
1895 and the "{", usually where the base class is specified. For other
1896 blocks, there isn't much to check, so we always pass.
1897
1898 Args:
1899 filename: The name of the current file.
1900 clean_lines: A CleansedLines instance containing the file.
1901 linenum: The number of the line to check.
1902 error: The function to call with any errors found.
1903 """
1904 pass
1905
1906 def CheckEnd(self, filename, clean_lines, linenum, error):
1907 """Run checks that applies to text after the closing brace.
1908
1909 This is mostly used for checking end of namespace comments.
1910
1911 Args:
1912 filename: The name of the current file.
1913 clean_lines: A CleansedLines instance containing the file.
1914 linenum: The number of the line to check.
1915 error: The function to call with any errors found.
1916 """
1917 pass
1918
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001919 def IsBlockInfo(self):
1920 """Returns true if this block is a _BlockInfo.
1921
1922 This is convenient for verifying that an object is an instance of
1923 a _BlockInfo, but not an instance of any of the derived classes.
1924
1925 Returns:
1926 True for this class, False for derived classes.
1927 """
1928 return self.__class__ == _BlockInfo
1929
1930
1931class _ExternCInfo(_BlockInfo):
1932 """Stores information about an 'extern "C"' block."""
1933
1934 def __init__(self):
1935 _BlockInfo.__init__(self, True)
1936
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001937
1938class _ClassInfo(_BlockInfo):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001939 """Stores information about a class."""
1940
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001941 def __init__(self, name, class_or_struct, clean_lines, linenum):
1942 _BlockInfo.__init__(self, False)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001943 self.name = name
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001944 self.starting_linenum = linenum
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001945 self.is_derived = False
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001946 if class_or_struct == 'struct':
1947 self.access = 'public'
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001948 self.is_struct = True
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001949 else:
1950 self.access = 'private'
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001951 self.is_struct = False
1952
1953 # Remember initial indentation level for this class. Using raw_lines here
1954 # instead of elided to account for leading comments.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00001955 self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum])
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001956
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00001957 # Try to find the end of the class. This will be confused by things like:
1958 # class A {
1959 # } *x = { ...
1960 #
1961 # But it's still good enough for CheckSectionSpacing.
1962 self.last_line = 0
1963 depth = 0
1964 for i in range(linenum, clean_lines.NumLines()):
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001965 line = clean_lines.elided[i]
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00001966 depth += line.count('{') - line.count('}')
1967 if not depth:
1968 self.last_line = i
1969 break
1970
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001971 def CheckBegin(self, filename, clean_lines, linenum, error):
1972 # Look for a bare ':'
1973 if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
1974 self.is_derived = True
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001975
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00001976 def CheckEnd(self, filename, clean_lines, linenum, error):
1977 # Check that closing brace is aligned with beginning of the class.
1978 # Only do this if the closing brace is indented by only whitespaces.
1979 # This means we will not check single-line class definitions.
1980 indent = Match(r'^( *)\}', clean_lines.elided[linenum])
1981 if indent and len(indent.group(1)) != self.class_indent:
1982 if self.is_struct:
1983 parent = 'struct ' + self.name
1984 else:
1985 parent = 'class ' + self.name
1986 error(filename, linenum, 'whitespace/indent', 3,
1987 'Closing brace should be aligned with beginning of %s' % parent)
1988
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001989
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00001990class _NamespaceInfo(_BlockInfo):
1991 """Stores information about a namespace."""
1992
1993 def __init__(self, name, linenum):
1994 _BlockInfo.__init__(self, False)
1995 self.name = name or ''
1996 self.starting_linenum = linenum
1997
1998 def CheckEnd(self, filename, clean_lines, linenum, error):
1999 """Check end of namespace comments."""
2000 line = clean_lines.raw_lines[linenum]
2001
2002 # Check how many lines is enclosed in this namespace. Don't issue
2003 # warning for missing namespace comments if there aren't enough
2004 # lines. However, do apply checks if there is already an end of
2005 # namespace comment and it's incorrect.
2006 #
2007 # TODO(unknown): We always want to check end of namespace comments
2008 # if a namespace is large, but sometimes we also want to apply the
2009 # check if a short namespace contained nontrivial things (something
2010 # other than forward declarations). There is currently no logic on
2011 # deciding what these nontrivial things are, so this check is
2012 # triggered by namespace size only, which works most of the time.
2013 if (linenum - self.starting_linenum < 10
2014 and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
2015 return
2016
2017 # Look for matching comment at end of namespace.
2018 #
2019 # Note that we accept C style "/* */" comments for terminating
2020 # namespaces, so that code that terminate namespaces inside
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00002021 # preprocessor macros can be cpplint clean.
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002022 #
2023 # We also accept stuff like "// end of namespace <name>." with the
2024 # period at the end.
2025 #
2026 # Besides these, we don't accept anything else, otherwise we might
2027 # get false negatives when existing comment is a substring of the
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00002028 # expected namespace.
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002029 if self.name:
2030 # Named namespace
2031 if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
2032 r'[\*/\.\\\s]*$'),
2033 line):
2034 error(filename, linenum, 'readability/namespace', 5,
2035 'Namespace should be terminated with "// namespace %s"' %
2036 self.name)
2037 else:
2038 # Anonymous namespace
2039 if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002040 # If "// namespace anonymous" or "// anonymous namespace (more text)",
2041 # mention "// anonymous namespace" as an acceptable form
2042 if Match(r'}.*\b(namespace anonymous|anonymous namespace)\b', line):
2043 error(filename, linenum, 'readability/namespace', 5,
2044 'Anonymous namespace should be terminated with "// namespace"'
2045 ' or "// anonymous namespace"')
2046 else:
2047 error(filename, linenum, 'readability/namespace', 5,
2048 'Anonymous namespace should be terminated with "// namespace"')
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002049
2050
2051class _PreprocessorInfo(object):
2052 """Stores checkpoints of nesting stacks when #if/#else is seen."""
2053
2054 def __init__(self, stack_before_if):
2055 # The entire nesting stack before #if
2056 self.stack_before_if = stack_before_if
2057
2058 # The entire nesting stack up to #else
2059 self.stack_before_else = []
2060
2061 # Whether we have already seen #else or #elif
2062 self.seen_else = False
2063
2064
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002065class NestingState(object):
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002066 """Holds states related to parsing braces."""
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002067
2068 def __init__(self):
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002069 # Stack for tracking all braces. An object is pushed whenever we
2070 # see a "{", and popped when we see a "}". Only 3 types of
2071 # objects are possible:
2072 # - _ClassInfo: a class or struct.
2073 # - _NamespaceInfo: a namespace.
2074 # - _BlockInfo: some other type of block.
2075 self.stack = []
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002076
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002077 # Top of the previous stack before each Update().
2078 #
2079 # Because the nesting_stack is updated at the end of each line, we
2080 # had to do some convoluted checks to find out what is the current
2081 # scope at the beginning of the line. This check is simplified by
2082 # saving the previous top of nesting stack.
2083 #
2084 # We could save the full stack, but we only need the top. Copying
2085 # the full nesting stack would slow down cpplint by ~10%.
2086 self.previous_stack_top = []
2087
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002088 # Stack of _PreprocessorInfo objects.
2089 self.pp_stack = []
2090
2091 def SeenOpenBrace(self):
2092 """Check if we have seen the opening brace for the innermost block.
2093
2094 Returns:
2095 True if we have seen the opening brace, False if the innermost
2096 block is still expecting an opening brace.
2097 """
2098 return (not self.stack) or self.stack[-1].seen_open_brace
2099
2100 def InNamespaceBody(self):
2101 """Check if we are currently one level inside a namespace body.
2102
2103 Returns:
2104 True if top of the stack is a namespace block, False otherwise.
2105 """
2106 return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
2107
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002108 def InExternC(self):
2109 """Check if we are currently one level inside an 'extern "C"' block.
2110
2111 Returns:
2112 True if top of the stack is an extern block, False otherwise.
2113 """
2114 return self.stack and isinstance(self.stack[-1], _ExternCInfo)
2115
2116 def InClassDeclaration(self):
2117 """Check if we are currently one level inside a class or struct declaration.
2118
2119 Returns:
2120 True if top of the stack is a class/struct, False otherwise.
2121 """
2122 return self.stack and isinstance(self.stack[-1], _ClassInfo)
2123
2124 def InAsmBlock(self):
2125 """Check if we are currently one level inside an inline ASM block.
2126
2127 Returns:
2128 True if the top of the stack is a block containing inline ASM.
2129 """
2130 return self.stack and self.stack[-1].inline_asm != _NO_ASM
2131
2132 def InTemplateArgumentList(self, clean_lines, linenum, pos):
2133 """Check if current position is inside template argument list.
2134
2135 Args:
2136 clean_lines: A CleansedLines instance containing the file.
2137 linenum: The number of the line to check.
2138 pos: position just after the suspected template argument.
2139 Returns:
2140 True if (linenum, pos) is inside template arguments.
2141 """
2142 while linenum < clean_lines.NumLines():
2143 # Find the earliest character that might indicate a template argument
2144 line = clean_lines.elided[linenum]
2145 match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:])
2146 if not match:
2147 linenum += 1
2148 pos = 0
2149 continue
2150 token = match.group(1)
2151 pos += len(match.group(0))
2152
2153 # These things do not look like template argument list:
2154 # class Suspect {
2155 # class Suspect x; }
2156 if token in ('{', '}', ';'): return False
2157
2158 # These things look like template argument list:
2159 # template <class Suspect>
2160 # template <class Suspect = default_value>
2161 # template <class Suspect[]>
2162 # template <class Suspect...>
2163 if token in ('>', '=', '[', ']', '.'): return True
2164
2165 # Check if token is an unmatched '<'.
2166 # If not, move on to the next character.
2167 if token != '<':
2168 pos += 1
2169 if pos >= len(line):
2170 linenum += 1
2171 pos = 0
2172 continue
2173
2174 # We can't be sure if we just find a single '<', and need to
2175 # find the matching '>'.
2176 (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1)
2177 if end_pos < 0:
2178 # Not sure if template argument list or syntax error in file
2179 return False
2180 linenum = end_line
2181 pos = end_pos
2182 return False
2183
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002184 def UpdatePreprocessor(self, line):
2185 """Update preprocessor stack.
2186
2187 We need to handle preprocessors due to classes like this:
2188 #ifdef SWIG
2189 struct ResultDetailsPageElementExtensionPoint {
2190 #else
2191 struct ResultDetailsPageElementExtensionPoint : public Extension {
2192 #endif
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002193
2194 We make the following assumptions (good enough for most files):
2195 - Preprocessor condition evaluates to true from #if up to first
2196 #else/#elif/#endif.
2197
2198 - Preprocessor condition evaluates to false from #else/#elif up
2199 to #endif. We still perform lint checks on these lines, but
2200 these do not affect nesting stack.
2201
2202 Args:
2203 line: current line to check.
2204 """
2205 if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
2206 # Beginning of #if block, save the nesting stack here. The saved
2207 # stack will allow us to restore the parsing state in the #else case.
2208 self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
2209 elif Match(r'^\s*#\s*(else|elif)\b', line):
2210 # Beginning of #else block
2211 if self.pp_stack:
2212 if not self.pp_stack[-1].seen_else:
2213 # This is the first #else or #elif block. Remember the
2214 # whole nesting stack up to this point. This is what we
2215 # keep after the #endif.
2216 self.pp_stack[-1].seen_else = True
2217 self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
2218
2219 # Restore the stack to how it was before the #if
2220 self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
2221 else:
2222 # TODO(unknown): unexpected #else, issue warning?
2223 pass
2224 elif Match(r'^\s*#\s*endif\b', line):
2225 # End of #if or #else blocks.
2226 if self.pp_stack:
2227 # If we saw an #else, we will need to restore the nesting
2228 # stack to its former state before the #else, otherwise we
2229 # will just continue from where we left off.
2230 if self.pp_stack[-1].seen_else:
2231 # Here we can just use a shallow copy since we are the last
2232 # reference to it.
2233 self.stack = self.pp_stack[-1].stack_before_else
2234 # Drop the corresponding #if
2235 self.pp_stack.pop()
2236 else:
2237 # TODO(unknown): unexpected #endif, issue warning?
2238 pass
2239
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002240 # TODO(unknown): Update() is too long, but we will refactor later.
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002241 def Update(self, filename, clean_lines, linenum, error):
2242 """Update nesting state with current line.
2243
2244 Args:
2245 filename: The name of the current file.
2246 clean_lines: A CleansedLines instance containing the file.
2247 linenum: The number of the line to check.
2248 error: The function to call with any errors found.
2249 """
2250 line = clean_lines.elided[linenum]
2251
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002252 # Remember top of the previous nesting stack.
2253 #
2254 # The stack is always pushed/popped and not modified in place, so
2255 # we can just do a shallow copy instead of copy.deepcopy. Using
2256 # deepcopy would slow down cpplint by ~28%.
2257 if self.stack:
2258 self.previous_stack_top = self.stack[-1]
2259 else:
2260 self.previous_stack_top = None
2261
2262 # Update pp_stack
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002263 self.UpdatePreprocessor(line)
2264
2265 # Count parentheses. This is to avoid adding struct arguments to
2266 # the nesting stack.
2267 if self.stack:
2268 inner_block = self.stack[-1]
2269 depth_change = line.count('(') - line.count(')')
2270 inner_block.open_parentheses += depth_change
2271
2272 # Also check if we are starting or ending an inline assembly block.
2273 if inner_block.inline_asm in (_NO_ASM, _END_ASM):
2274 if (depth_change != 0 and
2275 inner_block.open_parentheses == 1 and
2276 _MATCH_ASM.match(line)):
2277 # Enter assembly block
2278 inner_block.inline_asm = _INSIDE_ASM
2279 else:
2280 # Not entering assembly block. If previous line was _END_ASM,
2281 # we will now shift to _NO_ASM state.
2282 inner_block.inline_asm = _NO_ASM
2283 elif (inner_block.inline_asm == _INSIDE_ASM and
2284 inner_block.open_parentheses == 0):
2285 # Exit assembly block
2286 inner_block.inline_asm = _END_ASM
2287
2288 # Consume namespace declaration at the beginning of the line. Do
2289 # this in a loop so that we catch same line declarations like this:
2290 # namespace proto2 { namespace bridge { class MessageSet; } }
2291 while True:
2292 # Match start of namespace. The "\b\s*" below catches namespace
2293 # declarations even if it weren't followed by a whitespace, this
2294 # is so that we don't confuse our namespace checker. The
2295 # missing spaces will be flagged by CheckSpacing.
2296 namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
2297 if not namespace_decl_match:
2298 break
2299
2300 new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
2301 self.stack.append(new_namespace)
2302
2303 line = namespace_decl_match.group(2)
2304 if line.find('{') != -1:
2305 new_namespace.seen_open_brace = True
2306 line = line[line.find('{') + 1:]
2307
2308 # Look for a class declaration in whatever is left of the line
2309 # after parsing namespaces. The regexp accounts for decorated classes
2310 # such as in:
2311 # class LOCKABLE API Object {
2312 # };
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002313 class_decl_match = Match(
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002314 r'^(\s*(?:template\s*<[\w\s<>,:]*>\s*)?'
2315 r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))'
2316 r'(.*)$', line)
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002317 if (class_decl_match and
2318 (not self.stack or self.stack[-1].open_parentheses == 0)):
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002319 # We do not want to accept classes that are actually template arguments:
2320 # template <class Ignore1,
2321 # class Ignore2 = Default<Args>,
2322 # template <Args> class Ignore3>
2323 # void Function() {};
2324 #
2325 # To avoid template argument cases, we scan forward and look for
2326 # an unmatched '>'. If we see one, assume we are inside a
2327 # template argument list.
2328 end_declaration = len(class_decl_match.group(1))
2329 if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration):
2330 self.stack.append(_ClassInfo(
2331 class_decl_match.group(3), class_decl_match.group(2),
2332 clean_lines, linenum))
2333 line = class_decl_match.group(4)
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002334
2335 # If we have not yet seen the opening brace for the innermost block,
2336 # run checks here.
2337 if not self.SeenOpenBrace():
2338 self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
2339
2340 # Update access control if we are inside a class/struct
2341 if self.stack and isinstance(self.stack[-1], _ClassInfo):
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00002342 classinfo = self.stack[-1]
2343 access_match = Match(
2344 r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
2345 r':(?:[^:]|$)',
2346 line)
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002347 if access_match:
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00002348 classinfo.access = access_match.group(2)
2349
2350 # Check that access keywords are indented +1 space. Skip this
2351 # check if the keywords are not preceded by whitespaces.
2352 indent = access_match.group(1)
2353 if (len(indent) != classinfo.class_indent + 1 and
2354 Match(r'^\s*$', indent)):
2355 if classinfo.is_struct:
2356 parent = 'struct ' + classinfo.name
2357 else:
2358 parent = 'class ' + classinfo.name
2359 slots = ''
2360 if access_match.group(3):
2361 slots = access_match.group(3)
2362 error(filename, linenum, 'whitespace/indent', 3,
2363 '%s%s: should be indented +1 space inside %s' % (
2364 access_match.group(2), slots, parent))
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002365
2366 # Consume braces or semicolons from what's left of the line
2367 while True:
2368 # Match first brace, semicolon, or closed parenthesis.
2369 matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
2370 if not matched:
2371 break
2372
2373 token = matched.group(1)
2374 if token == '{':
2375 # If namespace or class hasn't seen a opening brace yet, mark
2376 # namespace/class head as complete. Push a new block onto the
2377 # stack otherwise.
2378 if not self.SeenOpenBrace():
2379 self.stack[-1].seen_open_brace = True
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002380 elif Match(r'^extern\s*"[^"]*"\s*\{', line):
2381 self.stack.append(_ExternCInfo())
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002382 else:
2383 self.stack.append(_BlockInfo(True))
2384 if _MATCH_ASM.match(line):
2385 self.stack[-1].inline_asm = _BLOCK_ASM
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002386
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002387 elif token == ';' or token == ')':
2388 # If we haven't seen an opening brace yet, but we already saw
2389 # a semicolon, this is probably a forward declaration. Pop
2390 # the stack for these.
2391 #
2392 # Similarly, if we haven't seen an opening brace yet, but we
2393 # already saw a closing parenthesis, then these are probably
2394 # function arguments with extra "class" or "struct" keywords.
2395 # Also pop these stack for these.
2396 if not self.SeenOpenBrace():
2397 self.stack.pop()
2398 else: # token == '}'
2399 # Perform end of block checks and pop the stack.
2400 if self.stack:
2401 self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
2402 self.stack.pop()
2403 line = matched.group(2)
2404
2405 def InnermostClass(self):
2406 """Get class info on the top of the stack.
2407
2408 Returns:
2409 A _ClassInfo object if we are inside a class, or None otherwise.
2410 """
2411 for i in range(len(self.stack), 0, -1):
2412 classinfo = self.stack[i - 1]
2413 if isinstance(classinfo, _ClassInfo):
2414 return classinfo
2415 return None
2416
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00002417 def CheckCompletedBlocks(self, filename, error):
2418 """Checks that all classes and namespaces have been completely parsed.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002419
2420 Call this when all lines in a file have been processed.
2421 Args:
2422 filename: The name of the current file.
2423 error: The function to call with any errors found.
2424 """
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002425 # Note: This test can result in false positives if #ifdef constructs
2426 # get in the way of brace matching. See the testBuildClass test in
2427 # cpplint_unittest.py for an example of this.
2428 for obj in self.stack:
2429 if isinstance(obj, _ClassInfo):
2430 error(filename, obj.starting_linenum, 'build/class', 5,
2431 'Failed to find complete declaration of class %s' %
2432 obj.name)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00002433 elif isinstance(obj, _NamespaceInfo):
2434 error(filename, obj.starting_linenum, 'build/namespaces', 5,
2435 'Failed to find complete declaration of namespace %s' %
2436 obj.name)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002437
2438
2439def CheckForNonStandardConstructs(filename, clean_lines, linenum,
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002440 nesting_state, error):
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00002441 r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002442
2443 Complain about several constructs which gcc-2 accepts, but which are
2444 not standard C++. Warning about these in lint is one way to ease the
2445 transition to new compilers.
2446 - put storage class first (e.g. "static const" instead of "const static").
2447 - "%lld" instead of %qd" in printf-type functions.
2448 - "%1$d" is non-standard in printf-type functions.
2449 - "\%" is an undefined character escape sequence.
2450 - text after #endif is not allowed.
2451 - invalid inner-style forward declaration.
2452 - >? and <? operators, and their >?= and <?= cousins.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002453
erg@google.com26970fa2009-11-17 18:07:32 +00002454 Additionally, check for constructor/destructor style violations and reference
2455 members, as it is very convenient to do so while checking for
2456 gcc-2 compliance.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002457
2458 Args:
2459 filename: The name of the current file.
2460 clean_lines: A CleansedLines instance containing the file.
2461 linenum: The number of the line to check.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002462 nesting_state: A NestingState instance which maintains information about
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002463 the current stack of nested blocks being parsed.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002464 error: A callable to which errors are reported, which takes 4 arguments:
2465 filename, line number, error level, and message
2466 """
2467
2468 # Remove comments from the line, but leave in strings for now.
2469 line = clean_lines.lines[linenum]
2470
2471 if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
2472 error(filename, linenum, 'runtime/printf_format', 3,
2473 '%q in format strings is deprecated. Use %ll instead.')
2474
2475 if Search(r'printf\s*\(.*".*%\d+\$', line):
2476 error(filename, linenum, 'runtime/printf_format', 2,
2477 '%N$ formats are unconventional. Try rewriting to avoid them.')
2478
2479 # Remove escaped backslashes before looking for undefined escapes.
2480 line = line.replace('\\\\', '')
2481
2482 if Search(r'("|\').*\\(%|\[|\(|{)', line):
2483 error(filename, linenum, 'build/printf_format', 3,
2484 '%, [, (, and { are undefined character escapes. Unescape them.')
2485
2486 # For the rest, work with both comments and strings removed.
2487 line = clean_lines.elided[linenum]
2488
2489 if Search(r'\b(const|volatile|void|char|short|int|long'
2490 r'|float|double|signed|unsigned'
2491 r'|schar|u?int8|u?int16|u?int32|u?int64)'
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002492 r'\s+(register|static|extern|typedef)\b',
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002493 line):
2494 error(filename, linenum, 'build/storage_class', 5,
2495 'Storage class (static, extern, typedef, etc) should be first.')
2496
2497 if Match(r'\s*#\s*endif\s*[^/\s]+', line):
2498 error(filename, linenum, 'build/endif_comment', 5,
2499 'Uncommented text after #endif is non-standard. Use a comment.')
2500
2501 if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
2502 error(filename, linenum, 'build/forward_decl', 5,
2503 'Inner-style forward declarations are invalid. Remove this line.')
2504
2505 if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
2506 line):
2507 error(filename, linenum, 'build/deprecated', 3,
2508 '>? and <? (max and min) operators are non-standard and deprecated.')
2509
erg@google.com26970fa2009-11-17 18:07:32 +00002510 if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
2511 # TODO(unknown): Could it be expanded safely to arbitrary references,
2512 # without triggering too many false positives? The first
2513 # attempt triggered 5 warnings for mostly benign code in the regtest, hence
2514 # the restriction.
2515 # Here's the original regexp, for the reference:
2516 # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
2517 # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
2518 error(filename, linenum, 'runtime/member_string_references', 2,
2519 'const string& members are dangerous. It is much better to use '
2520 'alternatives, such as pointers or simple constants.')
2521
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002522 # Everything else in this function operates on class declarations.
2523 # Return early if the top of the nesting stack is not a class, or if
2524 # the class head is not completed yet.
2525 classinfo = nesting_state.InnermostClass()
2526 if not classinfo or not classinfo.seen_open_brace:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002527 return
2528
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002529 # The class may have been declared with namespace or classname qualifiers.
2530 # The constructor and destructor will not have those qualifiers.
2531 base_classname = classinfo.name.split('::')[-1]
2532
2533 # Look for single-argument constructors that aren't marked explicit.
2534 # Technically a valid construct, but against style.
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00002535 args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002536 % re.escape(base_classname),
2537 line)
2538 if (args and
2539 args.group(1) != 'void' and
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002540 not Search(r'\bstd::initializer_list\b', args.group(1)) and
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00002541 not Match(r'(const\s+)?%s(\s+const)?\s*(?:<\w+>\s*)?&'
2542 % re.escape(base_classname), args.group(1).strip())):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002543 error(filename, linenum, 'runtime/explicit', 5,
2544 'Single-argument constructors should be marked explicit.')
2545
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002546
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002547def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002548 """Checks for the correctness of various spacing around function calls.
2549
2550 Args:
2551 filename: The name of the current file.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002552 clean_lines: A CleansedLines instance containing the file.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002553 linenum: The number of the line to check.
2554 error: The function to call with any errors found.
2555 """
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002556 line = clean_lines.elided[linenum]
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002557
2558 # Since function calls often occur inside if/for/while/switch
2559 # expressions - which have their own, more liberal conventions - we
2560 # first see if we should be looking inside such an expression for a
2561 # function call, to which we can apply more strict standards.
2562 fncall = line # if there's no control flow construct, look at whole line
2563 for pattern in (r'\bif\s*\((.*)\)\s*{',
2564 r'\bfor\s*\((.*)\)\s*{',
2565 r'\bwhile\s*\((.*)\)\s*[{;]',
2566 r'\bswitch\s*\((.*)\)\s*{'):
2567 match = Search(pattern, line)
2568 if match:
2569 fncall = match.group(1) # look inside the parens for function calls
2570 break
2571
2572 # Except in if/for/while/switch, there should never be space
2573 # immediately inside parens (eg "f( 3, 4 )"). We make an exception
2574 # for nested parens ( (a+b) + c ). Likewise, there should never be
2575 # a space before a ( when it's a function argument. I assume it's a
2576 # function argument when the char before the whitespace is legal in
2577 # a function name (alnum + _) and we're not starting a macro. Also ignore
2578 # pointers and references to arrays and functions coz they're too tricky:
2579 # we use a very simple way to recognize these:
2580 # " (something)(maybe-something)" or
2581 # " (something)(maybe-something," or
2582 # " (something)[something]"
2583 # Note that we assume the contents of [] to be short enough that
2584 # they'll never need to wrap.
2585 if ( # Ignore control structures.
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00002586 not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b',
2587 fncall) and
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002588 # Ignore pointers/references to functions.
2589 not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
2590 # Ignore pointers/references to arrays.
2591 not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
erg@google.com6317a9c2009-06-25 00:28:19 +00002592 if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002593 error(filename, linenum, 'whitespace/parens', 4,
2594 'Extra space after ( in function call')
erg@google.com6317a9c2009-06-25 00:28:19 +00002595 elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002596 error(filename, linenum, 'whitespace/parens', 2,
2597 'Extra space after (')
2598 if (Search(r'\w\s+\(', fncall) and
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002599 not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00002600 not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall)):
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002601 # TODO(unknown): Space after an operator function seem to be a common
2602 # error, silence those for now by restricting them to highest verbosity.
2603 if Search(r'\boperator_*\b', line):
2604 error(filename, linenum, 'whitespace/parens', 0,
2605 'Extra space before ( in function call')
2606 else:
2607 error(filename, linenum, 'whitespace/parens', 4,
2608 'Extra space before ( in function call')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002609 # If the ) is followed only by a newline or a { + newline, assume it's
2610 # part of a control statement (if/while/etc), and don't complain
2611 if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00002612 # If the closing parenthesis is preceded by only whitespaces,
2613 # try to give a more descriptive error message.
2614 if Search(r'^\s+\)', fncall):
2615 error(filename, linenum, 'whitespace/parens', 2,
2616 'Closing ) should be moved to the previous line')
2617 else:
2618 error(filename, linenum, 'whitespace/parens', 2,
2619 'Extra space before )')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002620
2621
2622def IsBlankLine(line):
2623 """Returns true if the given line is blank.
2624
2625 We consider a line to be blank if the line is empty or consists of
2626 only white spaces.
2627
2628 Args:
2629 line: A line of a string.
2630
2631 Returns:
2632 True, if the given line is blank.
2633 """
2634 return not line or line.isspace()
2635
2636
2637def CheckForFunctionLengths(filename, clean_lines, linenum,
2638 function_state, error):
2639 """Reports for long function bodies.
2640
2641 For an overview why this is done, see:
2642 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
2643
2644 Uses a simplistic algorithm assuming other style guidelines
2645 (especially spacing) are followed.
2646 Only checks unindented functions, so class members are unchecked.
2647 Trivial bodies are unchecked, so constructors with huge initializer lists
2648 may be missed.
2649 Blank/comment lines are not counted so as to avoid encouraging the removal
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00002650 of vertical space and comments just to get through a lint check.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002651 NOLINT *on the last line of a function* disables this check.
2652
2653 Args:
2654 filename: The name of the current file.
2655 clean_lines: A CleansedLines instance containing the file.
2656 linenum: The number of the line to check.
2657 function_state: Current function name and lines in body so far.
2658 error: The function to call with any errors found.
2659 """
2660 lines = clean_lines.lines
2661 line = lines[linenum]
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002662 joined_line = ''
2663
2664 starting_func = False
erg@google.com6317a9c2009-06-25 00:28:19 +00002665 regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002666 match_result = Match(regexp, line)
2667 if match_result:
2668 # If the name is all caps and underscores, figure it's a macro and
2669 # ignore it, unless it's TEST or TEST_F.
2670 function_name = match_result.group(1).split()[-1]
2671 if function_name == 'TEST' or function_name == 'TEST_F' or (
2672 not Match(r'[A-Z_]+$', function_name)):
2673 starting_func = True
2674
2675 if starting_func:
2676 body_found = False
erg@google.com6317a9c2009-06-25 00:28:19 +00002677 for start_linenum in xrange(linenum, clean_lines.NumLines()):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002678 start_line = lines[start_linenum]
2679 joined_line += ' ' + start_line.lstrip()
2680 if Search(r'(;|})', start_line): # Declarations and trivial functions
2681 body_found = True
2682 break # ... ignore
2683 elif Search(r'{', start_line):
2684 body_found = True
2685 function = Search(r'((\w|:)*)\(', line).group(1)
2686 if Match(r'TEST', function): # Handle TEST... macros
2687 parameter_regexp = Search(r'(\(.*\))', joined_line)
2688 if parameter_regexp: # Ignore bad syntax
2689 function += parameter_regexp.group(1)
2690 else:
2691 function += '()'
2692 function_state.Begin(function)
2693 break
2694 if not body_found:
erg@google.com6317a9c2009-06-25 00:28:19 +00002695 # No body for the function (or evidence of a non-function) was found.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002696 error(filename, linenum, 'readability/fn_size', 5,
2697 'Lint failed to find start of function body.')
2698 elif Match(r'^\}\s*$', line): # function end
erg@google.com35589e62010-11-17 18:58:16 +00002699 function_state.Check(error, filename, linenum)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002700 function_state.End()
2701 elif not Match(r'^\s*$', line):
2702 function_state.Count() # Count non-blank/non-comment lines.
2703
2704
2705_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
2706
2707
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002708def CheckComment(line, filename, linenum, next_line_start, error):
2709 """Checks for common mistakes in comments.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002710
2711 Args:
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002712 line: The line in question.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002713 filename: The name of the current file.
2714 linenum: The number of the line to check.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002715 next_line_start: The first non-whitespace column of the next line.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002716 error: The function to call with any errors found.
2717 """
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002718 commentpos = line.find('//')
2719 if commentpos != -1:
2720 # Check if the // may be in quotes. If so, ignore it
2721 # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
2722 if (line.count('"', 0, commentpos) -
2723 line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
2724 # Allow one space for new scopes, two spaces otherwise:
2725 if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos) and
2726 ((commentpos >= 1 and
2727 line[commentpos-1] not in string.whitespace) or
2728 (commentpos >= 2 and
2729 line[commentpos-2] not in string.whitespace))):
2730 error(filename, linenum, 'whitespace/comments', 2,
2731 'At least two spaces is best between code and comments')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002732
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002733 # Checks for common mistakes in TODO comments.
2734 comment = line[commentpos:]
2735 match = _RE_PATTERN_TODO.match(comment)
2736 if match:
2737 # One whitespace is correct; zero whitespace is handled elsewhere.
2738 leading_whitespace = match.group(1)
2739 if len(leading_whitespace) > 1:
2740 error(filename, linenum, 'whitespace/todo', 2,
2741 'Too many spaces before TODO')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002742
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002743 username = match.group(2)
2744 if not username:
2745 error(filename, linenum, 'readability/todo', 2,
2746 'Missing username in TODO; it should look like '
2747 '"// TODO(my_username): Stuff."')
2748
2749 middle_whitespace = match.group(3)
2750 # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
2751 if middle_whitespace != ' ' and middle_whitespace != '':
2752 error(filename, linenum, 'whitespace/todo', 2,
2753 'TODO(my_username) should be followed by a space')
2754
2755 # If the comment contains an alphanumeric character, there
2756 # should be a space somewhere between it and the //.
2757 if Match(r'//[^ ]*\w', comment):
2758 error(filename, linenum, 'whitespace/comments', 4,
2759 'Should have a space between // and comment')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002760
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002761def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
2762 """Checks for improper use of DISALLOW* macros.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002763
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002764 Args:
2765 filename: The name of the current file.
2766 clean_lines: A CleansedLines instance containing the file.
2767 linenum: The number of the line to check.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002768 nesting_state: A NestingState instance which maintains information about
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002769 the current stack of nested blocks being parsed.
2770 error: The function to call with any errors found.
2771 """
2772 line = clean_lines.elided[linenum] # get rid of comments and strings
2773
2774 matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
2775 r'DISALLOW_EVIL_CONSTRUCTORS|'
2776 r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
2777 if not matched:
2778 return
2779 if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
2780 if nesting_state.stack[-1].access != 'private':
2781 error(filename, linenum, 'readability/constructors', 3,
2782 '%s must be in the private: section' % matched.group(1))
2783
2784 else:
2785 # Found DISALLOW* macro outside a class declaration, or perhaps it
2786 # was used inside a function when it should have been part of the
2787 # class declaration. We could issue a warning here, but it
2788 # probably resulted in a compiler error already.
2789 pass
2790
2791
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002792def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002793 """Checks for the correctness of various spacing issues in the code.
2794
2795 Things we check for: spaces around operators, spaces after
2796 if/for/while/switch, no spaces around parens in function calls, two
2797 spaces between code and comment, don't start a block with a blank
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00002798 line, don't end a function with a blank line, don't add a blank line
2799 after public/protected/private, don't have too many blank lines in a row.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002800
2801 Args:
2802 filename: The name of the current file.
2803 clean_lines: A CleansedLines instance containing the file.
2804 linenum: The number of the line to check.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002805 nesting_state: A NestingState instance which maintains information about
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002806 the current stack of nested blocks being parsed.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002807 error: The function to call with any errors found.
2808 """
2809
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00002810 # Don't use "elided" lines here, otherwise we can't check commented lines.
2811 # Don't want to use "raw" either, because we don't want to check inside C++11
2812 # raw strings,
2813 raw = clean_lines.lines_without_raw_strings
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002814 line = raw[linenum]
2815
2816 # Before nixing comments, check if the line is blank for no good
2817 # reason. This includes the first line after a block is opened, and
2818 # blank lines at the end of a function (ie, right before a line like '}'
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002819 #
2820 # Skip all the blank line checks if we are immediately inside a
2821 # namespace body. In other words, don't issue blank line warnings
2822 # for this block:
2823 # namespace {
2824 #
2825 # }
2826 #
2827 # A warning about missing end of namespace comments will be issued instead.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002828 #
2829 # Also skip blank line checks for 'extern "C"' blocks, which are formatted
2830 # like namespaces.
2831 if (IsBlankLine(line) and
2832 not nesting_state.InNamespaceBody() and
2833 not nesting_state.InExternC()):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002834 elided = clean_lines.elided
2835 prev_line = elided[linenum - 1]
2836 prevbrace = prev_line.rfind('{')
2837 # TODO(unknown): Don't complain if line before blank line, and line after,
2838 # both start with alnums and are indented the same amount.
2839 # This ignores whitespace at the start of a namespace block
2840 # because those are not usually indented.
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002841 if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002842 # OK, we have a blank line at the start of a code block. Before we
2843 # complain, we check if it is an exception to the rule: The previous
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00002844 # non-empty line has the parameters of a function header that are indented
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002845 # 4 spaces (because they did not fit in a 80 column line when placed on
2846 # the same line as the function name). We also check for the case where
2847 # the previous line is indented 6 spaces, which may happen when the
2848 # initializers of a constructor do not fit into a 80 column line.
2849 exception = False
2850 if Match(r' {6}\w', prev_line): # Initializer list?
2851 # We are looking for the opening column of initializer list, which
2852 # should be indented 4 spaces to cause 6 space indentation afterwards.
2853 search_position = linenum-2
2854 while (search_position >= 0
2855 and Match(r' {6}\w', elided[search_position])):
2856 search_position -= 1
2857 exception = (search_position >= 0
2858 and elided[search_position][:5] == ' :')
2859 else:
2860 # Search for the function arguments or an initializer list. We use a
2861 # simple heuristic here: If the line is indented 4 spaces; and we have a
2862 # closing paren, without the opening paren, followed by an opening brace
2863 # or colon (for initializer lists) we assume that it is the last line of
2864 # a function header. If we have a colon indented 4 spaces, it is an
2865 # initializer list.
2866 exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
2867 prev_line)
2868 or Match(r' {4}:', prev_line))
2869
2870 if not exception:
2871 error(filename, linenum, 'whitespace/blank_line', 2,
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00002872 'Redundant blank line at the start of a code block '
2873 'should be deleted.')
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002874 # Ignore blank lines at the end of a block in a long if-else
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002875 # chain, like this:
2876 # if (condition1) {
2877 # // Something followed by a blank line
2878 #
2879 # } else if (condition2) {
2880 # // Something else
2881 # }
2882 if linenum + 1 < clean_lines.NumLines():
2883 next_line = raw[linenum + 1]
2884 if (next_line
2885 and Match(r'\s*}', next_line)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002886 and next_line.find('} else ') == -1):
2887 error(filename, linenum, 'whitespace/blank_line', 3,
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00002888 'Redundant blank line at the end of a code block '
2889 'should be deleted.')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002890
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00002891 matched = Match(r'\s*(public|protected|private):', prev_line)
2892 if matched:
2893 error(filename, linenum, 'whitespace/blank_line', 3,
2894 'Do not leave a blank line after "%s:"' % matched.group(1))
2895
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002896 # Next, check comments
2897 next_line_start = 0
2898 if linenum + 1 < clean_lines.NumLines():
2899 next_line = raw[linenum + 1]
2900 next_line_start = len(next_line) - len(next_line.lstrip())
2901 CheckComment(line, filename, linenum, next_line_start, error)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002902
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002903 # get rid of comments and strings
2904 line = clean_lines.elided[linenum]
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002905
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002906 # You shouldn't have spaces before your brackets, except maybe after
2907 # 'delete []' or 'return []() {};'
2908 if Search(r'\w\s+\[', line) and not Search(r'(?:delete|return)\s+\[', line):
2909 error(filename, linenum, 'whitespace/braces', 5,
2910 'Extra space before [')
2911
2912 # In range-based for, we wanted spaces before and after the colon, but
2913 # not around "::" tokens that might appear.
2914 if (Search(r'for *\(.*[^:]:[^: ]', line) or
2915 Search(r'for *\(.*[^: ]:[^:]', line)):
2916 error(filename, linenum, 'whitespace/forcolon', 2,
2917 'Missing space around colon in range-based for loop')
2918
2919
2920def CheckOperatorSpacing(filename, clean_lines, linenum, error):
2921 """Checks for horizontal spacing around operators.
2922
2923 Args:
2924 filename: The name of the current file.
2925 clean_lines: A CleansedLines instance containing the file.
2926 linenum: The number of the line to check.
2927 error: The function to call with any errors found.
2928 """
2929 line = clean_lines.elided[linenum]
2930
2931 # Don't try to do spacing checks for operator methods. Do this by
2932 # replacing the troublesome characters with something else,
2933 # preserving column position for all other characters.
2934 #
2935 # The replacement is done repeatedly to avoid false positives from
2936 # operators that call operators.
2937 while True:
2938 match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line)
2939 if match:
2940 line = match.group(1) + ('_' * len(match.group(2))) + match.group(3)
2941 else:
2942 break
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002943
2944 # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
2945 # Otherwise not. Note we only check for non-spaces on *both* sides;
2946 # sometimes people put non-spaces on one side when aligning ='s among
2947 # many lines (not that this is behavior that I approve of...)
2948 if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
2949 error(filename, linenum, 'whitespace/operators', 4,
2950 'Missing spaces around =')
2951
2952 # It's ok not to have spaces around binary operators like + - * /, but if
2953 # there's too little whitespace, we get concerned. It's hard to tell,
2954 # though, so we punt on this one for now. TODO.
2955
2956 # You should always have whitespace around binary operators.
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002957 #
2958 # Check <= and >= first to avoid false positives with < and >, then
2959 # check non-include lines for spacing around < and >.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002960 #
2961 # If the operator is followed by a comma, assume it's be used in a
2962 # macro context and don't do any checks. This avoids false
2963 # positives.
2964 #
2965 # Note that && is not included here. Those are checked separately
2966 # in CheckRValueReference
2967 match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002968 if match:
2969 error(filename, linenum, 'whitespace/operators', 3,
2970 'Missing spaces around %s' % match.group(1))
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002971 elif not Match(r'#.*include', line):
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002972 # Look for < that is not surrounded by spaces. This is only
2973 # triggered if both sides are missing spaces, even though
2974 # technically should should flag if at least one side is missing a
2975 # space. This is done to avoid some false positives with shifts.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002976 match = Match(r'^(.*[^\s<])<[^\s=<,]', line)
2977 if match:
2978 (_, _, end_pos) = CloseExpression(
2979 clean_lines, linenum, len(match.group(1)))
2980 if end_pos <= -1:
2981 error(filename, linenum, 'whitespace/operators', 3,
2982 'Missing spaces around <')
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00002983
2984 # Look for > that is not surrounded by spaces. Similar to the
2985 # above, we only trigger if both sides are missing spaces to avoid
2986 # false positives with shifts.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00002987 match = Match(r'^(.*[^-\s>])>[^\s=>,]', line)
2988 if match:
2989 (_, _, start_pos) = ReverseCloseExpression(
2990 clean_lines, linenum, len(match.group(1)))
2991 if start_pos <= -1:
2992 error(filename, linenum, 'whitespace/operators', 3,
2993 'Missing spaces around >')
2994
2995 # We allow no-spaces around << when used like this: 10<<20, but
2996 # not otherwise (particularly, not when used as streams)
2997 # We also allow operators following an opening parenthesis, since
2998 # those tend to be macros that deal with operators.
2999 match = Search(r'(operator|\S)(?:L|UL|ULL|l|ul|ull)?<<([^\s,=])', line)
3000 if (match and match.group(1) != '(' and
3001 not (match.group(1).isdigit() and match.group(2).isdigit()) and
3002 not (match.group(1) == 'operator' and match.group(2) == ';')):
3003 error(filename, linenum, 'whitespace/operators', 3,
3004 'Missing spaces around <<')
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003005
3006 # We allow no-spaces around >> for almost anything. This is because
3007 # C++11 allows ">>" to close nested templates, which accounts for
3008 # most cases when ">>" is not followed by a space.
3009 #
3010 # We still warn on ">>" followed by alpha character, because that is
3011 # likely due to ">>" being used for right shifts, e.g.:
3012 # value >> alpha
3013 #
3014 # When ">>" is used to close templates, the alphanumeric letter that
3015 # follows would be part of an identifier, and there should still be
3016 # a space separating the template type and the identifier.
3017 # type<type<type>> alpha
3018 match = Search(r'>>[a-zA-Z_]', line)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003019 if match:
3020 error(filename, linenum, 'whitespace/operators', 3,
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003021 'Missing spaces around >>')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003022
3023 # There shouldn't be space around unary operators
3024 match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
3025 if match:
3026 error(filename, linenum, 'whitespace/operators', 4,
3027 'Extra space for operator %s' % match.group(1))
3028
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003029
3030def CheckParenthesisSpacing(filename, clean_lines, linenum, error):
3031 """Checks for horizontal spacing around parentheses.
3032
3033 Args:
3034 filename: The name of the current file.
3035 clean_lines: A CleansedLines instance containing the file.
3036 linenum: The number of the line to check.
3037 error: The function to call with any errors found.
3038 """
3039 line = clean_lines.elided[linenum]
3040
3041 # No spaces after an if, while, switch, or for
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003042 match = Search(r' (if\(|for\(|while\(|switch\()', line)
3043 if match:
3044 error(filename, linenum, 'whitespace/parens', 5,
3045 'Missing space before ( in %s' % match.group(1))
3046
3047 # For if/for/while/switch, the left and right parens should be
3048 # consistent about how many spaces are inside the parens, and
3049 # there should either be zero or one spaces inside the parens.
3050 # We don't want: "if ( foo)" or "if ( foo )".
erg@google.com6317a9c2009-06-25 00:28:19 +00003051 # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003052 match = Search(r'\b(if|for|while|switch)\s*'
3053 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
3054 line)
3055 if match:
3056 if len(match.group(2)) != len(match.group(4)):
3057 if not (match.group(3) == ';' and
erg@google.com6317a9c2009-06-25 00:28:19 +00003058 len(match.group(2)) == 1 + len(match.group(4)) or
3059 not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003060 error(filename, linenum, 'whitespace/parens', 5,
3061 'Mismatching spaces inside () in %s' % match.group(1))
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003062 if len(match.group(2)) not in [0, 1]:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003063 error(filename, linenum, 'whitespace/parens', 5,
3064 'Should have zero or one spaces inside ( and ) in %s' %
3065 match.group(1))
3066
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003067
3068def CheckCommaSpacing(filename, clean_lines, linenum, error):
3069 """Checks for horizontal spacing near commas and semicolons.
3070
3071 Args:
3072 filename: The name of the current file.
3073 clean_lines: A CleansedLines instance containing the file.
3074 linenum: The number of the line to check.
3075 error: The function to call with any errors found.
3076 """
3077 raw = clean_lines.lines_without_raw_strings
3078 line = clean_lines.elided[linenum]
3079
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003080 # You should always have a space after a comma (either as fn arg or operator)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003081 #
3082 # This does not apply when the non-space character following the
3083 # comma is another comma, since the only time when that happens is
3084 # for empty macro arguments.
3085 #
3086 # We run this check in two passes: first pass on elided lines to
3087 # verify that lines contain missing whitespaces, second pass on raw
3088 # lines to confirm that those missing whitespaces are not due to
3089 # elided comments.
3090 if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003091 error(filename, linenum, 'whitespace/comma', 3,
3092 'Missing space after ,')
3093
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00003094 # You should always have a space after a semicolon
3095 # except for few corner cases
3096 # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
3097 # space after ;
3098 if Search(r';[^\s};\\)/]', line):
3099 error(filename, linenum, 'whitespace/semicolon', 3,
3100 'Missing space after ;')
3101
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003102
3103def CheckBracesSpacing(filename, clean_lines, linenum, error):
3104 """Checks for horizontal spacing near commas.
3105
3106 Args:
3107 filename: The name of the current file.
3108 clean_lines: A CleansedLines instance containing the file.
3109 linenum: The number of the line to check.
3110 error: The function to call with any errors found.
3111 """
3112 line = clean_lines.elided[linenum]
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003113
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00003114 # Except after an opening paren, or after another opening brace (in case of
3115 # an initializer list, for instance), you should have spaces before your
3116 # braces. And since you should never have braces at the beginning of a line,
3117 # this is an easy test.
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003118 match = Match(r'^(.*[^ ({]){', line)
3119 if match:
3120 # Try a bit harder to check for brace initialization. This
3121 # happens in one of the following forms:
3122 # Constructor() : initializer_list_{} { ... }
3123 # Constructor{}.MemberFunction()
3124 # Type variable{};
3125 # FunctionCall(type{}, ...);
3126 # LastArgument(..., type{});
3127 # LOG(INFO) << type{} << " ...";
3128 # map_of_type[{...}] = ...;
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003129 # ternary = expr ? new type{} : nullptr;
3130 # OuterTemplate<InnerTemplateConstructor<Type>{}>
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003131 #
3132 # We check for the character following the closing brace, and
3133 # silence the warning if it's one of those listed above, i.e.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003134 # "{.;,)<>]:".
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003135 #
3136 # To account for nested initializer list, we allow any number of
3137 # closing braces up to "{;,)<". We can't simply silence the
3138 # warning on first sight of closing brace, because that would
3139 # cause false negatives for things that are not initializer lists.
3140 # Silence this: But not this:
3141 # Outer{ if (...) {
3142 # Inner{...} if (...){ // Missing space before {
3143 # }; }
3144 #
3145 # There is a false negative with this approach if people inserted
3146 # spurious semicolons, e.g. "if (cond){};", but we will catch the
3147 # spurious semicolon with a separate check.
3148 (endline, endlinenum, endpos) = CloseExpression(
3149 clean_lines, linenum, len(match.group(1)))
3150 trailing_text = ''
3151 if endpos > -1:
3152 trailing_text = endline[endpos:]
3153 for offset in xrange(endlinenum + 1,
3154 min(endlinenum + 3, clean_lines.NumLines() - 1)):
3155 trailing_text += clean_lines.elided[offset]
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003156 if not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text):
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003157 error(filename, linenum, 'whitespace/braces', 5,
3158 'Missing space before {')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003159
3160 # Make sure '} else {' has spaces.
3161 if Search(r'}else', line):
3162 error(filename, linenum, 'whitespace/braces', 5,
3163 'Missing space before else')
3164
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003165 # You shouldn't have a space before a semicolon at the end of the line.
3166 # There's a special case for "for" since the style guide allows space before
3167 # the semicolon there.
3168 if Search(r':\s*;\s*$', line):
3169 error(filename, linenum, 'whitespace/semicolon', 5,
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003170 'Semicolon defining empty statement. Use {} instead.')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003171 elif Search(r'^\s*;\s*$', line):
3172 error(filename, linenum, 'whitespace/semicolon', 5,
3173 'Line contains only semicolon. If this should be an empty statement, '
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003174 'use {} instead.')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003175 elif (Search(r'\s+;\s*$', line) and
3176 not Search(r'\bfor\b', line)):
3177 error(filename, linenum, 'whitespace/semicolon', 5,
3178 'Extra space before last semicolon. If this should be an empty '
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003179 'statement, use {} instead.')
3180
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003181
3182def IsDecltype(clean_lines, linenum, column):
3183 """Check if the token ending on (linenum, column) is decltype().
3184
3185 Args:
3186 clean_lines: A CleansedLines instance containing the file.
3187 linenum: the number of the line to check.
3188 column: end column of the token to check.
3189 Returns:
3190 True if this token is decltype() expression, False otherwise.
3191 """
3192 (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column)
3193 if start_col < 0:
3194 return False
3195 if Search(r'\bdecltype\s*$', text[0:start_col]):
3196 return True
3197 return False
3198
3199
3200def IsTemplateParameterList(clean_lines, linenum, column):
3201 """Check if the token ending on (linenum, column) is the end of template<>.
3202
3203 Args:
3204 clean_lines: A CleansedLines instance containing the file.
3205 linenum: the number of the line to check.
3206 column: end column of the token to check.
3207 Returns:
3208 True if this token is end of a template parameter list, False otherwise.
3209 """
3210 (_, startline, startpos) = ReverseCloseExpression(
3211 clean_lines, linenum, column)
3212 if (startpos > -1 and
3213 Search(r'\btemplate\s*$', clean_lines.elided[startline][0:startpos])):
3214 return True
3215 return False
3216
3217
3218def IsRValueType(clean_lines, nesting_state, linenum, column):
3219 """Check if the token ending on (linenum, column) is a type.
3220
3221 Assumes that text to the right of the column is "&&" or a function
3222 name.
3223
3224 Args:
3225 clean_lines: A CleansedLines instance containing the file.
3226 nesting_state: A NestingState instance which maintains information about
3227 the current stack of nested blocks being parsed.
3228 linenum: the number of the line to check.
3229 column: end column of the token to check.
3230 Returns:
3231 True if this token is a type, False if we are not sure.
3232 """
3233 prefix = clean_lines.elided[linenum][0:column]
3234
3235 # Get one word to the left. If we failed to do so, this is most
3236 # likely not a type, since it's unlikely that the type name and "&&"
3237 # would be split across multiple lines.
3238 match = Match(r'^(.*)(\b\w+|[>*)&])\s*$', prefix)
3239 if not match:
3240 return False
3241
3242 # Check text following the token. If it's "&&>" or "&&," or "&&...", it's
3243 # most likely a rvalue reference used inside a template.
3244 suffix = clean_lines.elided[linenum][column:]
3245 if Match(r'&&\s*(?:[>,]|\.\.\.)', suffix):
3246 return True
3247
3248 # Check for simple type and end of templates:
3249 # int&& variable
3250 # vector<int>&& variable
3251 #
3252 # Because this function is called recursively, we also need to
3253 # recognize pointer and reference types:
3254 # int* Function()
3255 # int& Function()
3256 if match.group(2) in ['char', 'char16_t', 'char32_t', 'wchar_t', 'bool',
3257 'short', 'int', 'long', 'signed', 'unsigned',
3258 'float', 'double', 'void', 'auto', '>', '*', '&']:
3259 return True
3260
3261 # If we see a close parenthesis, look for decltype on the other side.
3262 # decltype would unambiguously identify a type, anything else is
3263 # probably a parenthesized expression and not a type.
3264 if match.group(2) == ')':
3265 return IsDecltype(
3266 clean_lines, linenum, len(match.group(1)) + len(match.group(2)) - 1)
3267
3268 # Check for casts and cv-qualifiers.
3269 # match.group(1) remainder
3270 # -------------- ---------
3271 # const_cast< type&&
3272 # const type&&
3273 # type const&&
3274 if Search(r'\b(?:const_cast\s*<|static_cast\s*<|dynamic_cast\s*<|'
3275 r'reinterpret_cast\s*<|\w+\s)\s*$',
3276 match.group(1)):
3277 return True
3278
3279 # Look for a preceding symbol that might help differentiate the context.
3280 # These are the cases that would be ambiguous:
3281 # match.group(1) remainder
3282 # -------------- ---------
3283 # Call ( expression &&
3284 # Declaration ( type&&
3285 # sizeof ( type&&
3286 # if ( expression &&
3287 # while ( expression &&
3288 # for ( type&&
3289 # for( ; expression &&
3290 # statement ; type&&
3291 # block { type&&
3292 # constructor { expression &&
3293 start = linenum
3294 line = match.group(1)
3295 match_symbol = None
3296 while start >= 0:
3297 # We want to skip over identifiers and commas to get to a symbol.
3298 # Commas are skipped so that we can find the opening parenthesis
3299 # for function parameter lists.
3300 match_symbol = Match(r'^(.*)([^\w\s,])[\w\s,]*$', line)
3301 if match_symbol:
3302 break
3303 start -= 1
3304 line = clean_lines.elided[start]
3305
3306 if not match_symbol:
3307 # Probably the first statement in the file is an rvalue reference
3308 return True
3309
3310 if match_symbol.group(2) == '}':
3311 # Found closing brace, probably an indicate of this:
3312 # block{} type&&
3313 return True
3314
3315 if match_symbol.group(2) == ';':
3316 # Found semicolon, probably one of these:
3317 # for(; expression &&
3318 # statement; type&&
3319
3320 # Look for the previous 'for(' in the previous lines.
3321 before_text = match_symbol.group(1)
3322 for i in xrange(start - 1, max(start - 6, 0), -1):
3323 before_text = clean_lines.elided[i] + before_text
3324 if Search(r'for\s*\([^{};]*$', before_text):
3325 # This is the condition inside a for-loop
3326 return False
3327
3328 # Did not find a for-init-statement before this semicolon, so this
3329 # is probably a new statement and not a condition.
3330 return True
3331
3332 if match_symbol.group(2) == '{':
3333 # Found opening brace, probably one of these:
3334 # block{ type&& = ... ; }
3335 # constructor{ expression && expression }
3336
3337 # Look for a closing brace or a semicolon. If we see a semicolon
3338 # first, this is probably a rvalue reference.
3339 line = clean_lines.elided[start][0:len(match_symbol.group(1)) + 1]
3340 end = start
3341 depth = 1
3342 while True:
3343 for ch in line:
3344 if ch == ';':
3345 return True
3346 elif ch == '{':
3347 depth += 1
3348 elif ch == '}':
3349 depth -= 1
3350 if depth == 0:
3351 return False
3352 end += 1
3353 if end >= clean_lines.NumLines():
3354 break
3355 line = clean_lines.elided[end]
3356 # Incomplete program?
3357 return False
3358
3359 if match_symbol.group(2) == '(':
3360 # Opening parenthesis. Need to check what's to the left of the
3361 # parenthesis. Look back one extra line for additional context.
3362 before_text = match_symbol.group(1)
3363 if linenum > 1:
3364 before_text = clean_lines.elided[linenum - 1] + before_text
3365 before_text = match_symbol.group(1)
3366
3367 # Patterns that are likely to be types:
3368 # [](type&&
3369 # for (type&&
3370 # sizeof(type&&
3371 # operator=(type&&
3372 #
3373 if Search(r'(?:\]|\bfor|\bsizeof|\boperator\s*\S+\s*)\s*$', before_text):
3374 return True
3375
3376 # Patterns that are likely to be expressions:
3377 # if (expression &&
3378 # while (expression &&
3379 # : initializer(expression &&
3380 # , initializer(expression &&
3381 # ( FunctionCall(expression &&
3382 # + FunctionCall(expression &&
3383 # + (expression &&
3384 #
3385 # The last '+' represents operators such as '+' and '-'.
3386 if Search(r'(?:\bif|\bwhile|[-+=%^(<!?:,&*]\s*)$', before_text):
3387 return False
3388
3389 # Something else. Check that tokens to the left look like
3390 # return_type function_name
3391 match_func = Match(r'^(.*)\s+\w(?:\w|::)*(?:<[^<>]*>)?\s*$',
3392 match_symbol.group(1))
3393 if match_func:
3394 # Check for constructors, which don't have return types.
3395 if Search(r'\bexplicit$', match_func.group(1)):
3396 return True
3397 implicit_constructor = Match(r'\s*(\w+)\((?:const\s+)?(\w+)', prefix)
3398 if (implicit_constructor and
3399 implicit_constructor.group(1) == implicit_constructor.group(2)):
3400 return True
3401 return IsRValueType(clean_lines, nesting_state, linenum,
3402 len(match_func.group(1)))
3403
3404 # Nothing before the function name. If this is inside a block scope,
3405 # this is probably a function call.
3406 return not (nesting_state.previous_stack_top and
3407 nesting_state.previous_stack_top.IsBlockInfo())
3408
3409 if match_symbol.group(2) == '>':
3410 # Possibly a closing bracket, check that what's on the other side
3411 # looks like the start of a template.
3412 return IsTemplateParameterList(
3413 clean_lines, start, len(match_symbol.group(1)))
3414
3415 # Some other symbol, usually something like "a=b&&c". This is most
3416 # likely not a type.
3417 return False
3418
3419
3420def IsRValueAllowed(clean_lines, linenum):
3421 """Check if RValue reference is allowed within some range of lines.
3422
3423 Args:
3424 clean_lines: A CleansedLines instance containing the file.
3425 linenum: The number of the line to check.
3426 Returns:
3427 True if line is within the region where RValue references are allowed.
3428 """
3429 for i in xrange(linenum, 0, -1):
3430 line = clean_lines.elided[i]
3431 if Match(r'GOOGLE_ALLOW_RVALUE_REFERENCES_(?:PUSH|POP)', line):
3432 if not line.endswith('PUSH'):
3433 return False
3434 for j in xrange(linenum, clean_lines.NumLines(), 1):
3435 line = clean_lines.elided[j]
3436 if Match(r'GOOGLE_ALLOW_RVALUE_REFERENCES_(?:PUSH|POP)', line):
3437 return line.endswith('POP')
3438 return False
3439
3440
3441def CheckRValueReference(filename, clean_lines, linenum, nesting_state, error):
3442 """Check for rvalue references.
3443
3444 Args:
3445 filename: The name of the current file.
3446 clean_lines: A CleansedLines instance containing the file.
3447 linenum: The number of the line to check.
3448 nesting_state: A NestingState instance which maintains information about
3449 the current stack of nested blocks being parsed.
3450 error: The function to call with any errors found.
3451 """
3452 # Find lines missing spaces around &&.
3453 # TODO(unknown): currently we don't check for rvalue references
3454 # with spaces surrounding the && to avoid false positives with
3455 # boolean expressions.
3456 line = clean_lines.elided[linenum]
3457 match = Match(r'^(.*\S)&&', line)
3458 if not match:
3459 match = Match(r'(.*)&&\S', line)
3460 if (not match) or '(&&)' in line or Search(r'\boperator\s*$', match.group(1)):
3461 return
3462
3463 # Either poorly formed && or an rvalue reference, check the context
3464 # to get a more accurate error message. Mostly we want to determine
3465 # if what's to the left of "&&" is a type or not.
3466 and_pos = len(match.group(1))
3467 if IsRValueType(clean_lines, nesting_state, linenum, and_pos):
3468 if not IsRValueAllowed(clean_lines, linenum):
3469 error(filename, linenum, 'build/c++11', 3,
3470 'RValue references are an unapproved C++ feature.')
3471 else:
3472 error(filename, linenum, 'whitespace/operators', 3,
3473 'Missing spaces around &&')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003474
3475
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00003476def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
3477 """Checks for additional blank line issues related to sections.
3478
3479 Currently the only thing checked here is blank line before protected/private.
3480
3481 Args:
3482 filename: The name of the current file.
3483 clean_lines: A CleansedLines instance containing the file.
3484 class_info: A _ClassInfo objects.
3485 linenum: The number of the line to check.
3486 error: The function to call with any errors found.
3487 """
3488 # Skip checks if the class is small, where small means 25 lines or less.
3489 # 25 lines seems like a good cutoff since that's the usual height of
3490 # terminals, and any class that can't fit in one screen can't really
3491 # be considered "small".
3492 #
3493 # Also skip checks if we are on the first line. This accounts for
3494 # classes that look like
3495 # class Foo { public: ... };
3496 #
3497 # If we didn't find the end of the class, last_line would be zero,
3498 # and the check will be skipped by the first condition.
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003499 if (class_info.last_line - class_info.starting_linenum <= 24 or
3500 linenum <= class_info.starting_linenum):
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00003501 return
3502
3503 matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
3504 if matched:
3505 # Issue warning if the line before public/protected/private was
3506 # not a blank line, but don't do this if the previous line contains
3507 # "class" or "struct". This can happen two ways:
3508 # - We are at the beginning of the class.
3509 # - We are forward-declaring an inner class that is semantically
3510 # private, but needed to be public for implementation reasons.
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003511 # Also ignores cases where the previous line ends with a backslash as can be
3512 # common when defining classes in C macros.
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00003513 prev_line = clean_lines.lines[linenum - 1]
3514 if (not IsBlankLine(prev_line) and
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003515 not Search(r'\b(class|struct)\b', prev_line) and
3516 not Search(r'\\$', prev_line)):
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00003517 # Try a bit harder to find the beginning of the class. This is to
3518 # account for multi-line base-specifier lists, e.g.:
3519 # class Derived
3520 # : public Base {
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003521 end_class_head = class_info.starting_linenum
3522 for i in range(class_info.starting_linenum, linenum):
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00003523 if Search(r'\{\s*$', clean_lines.lines[i]):
3524 end_class_head = i
3525 break
3526 if end_class_head < linenum - 1:
3527 error(filename, linenum, 'whitespace/blank_line', 3,
3528 '"%s:" should be preceded by a blank line' % matched.group(1))
3529
3530
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003531def GetPreviousNonBlankLine(clean_lines, linenum):
3532 """Return the most recent non-blank line and its line number.
3533
3534 Args:
3535 clean_lines: A CleansedLines instance containing the file contents.
3536 linenum: The number of the line to check.
3537
3538 Returns:
3539 A tuple with two elements. The first element is the contents of the last
3540 non-blank line before the current line, or the empty string if this is the
3541 first non-blank line. The second is the line number of that line, or -1
3542 if this is the first non-blank line.
3543 """
3544
3545 prevlinenum = linenum - 1
3546 while prevlinenum >= 0:
3547 prevline = clean_lines.elided[prevlinenum]
3548 if not IsBlankLine(prevline): # if not a blank line...
3549 return (prevline, prevlinenum)
3550 prevlinenum -= 1
3551 return ('', -1)
3552
3553
3554def CheckBraces(filename, clean_lines, linenum, error):
3555 """Looks for misplaced braces (e.g. at the end of line).
3556
3557 Args:
3558 filename: The name of the current file.
3559 clean_lines: A CleansedLines instance containing the file.
3560 linenum: The number of the line to check.
3561 error: The function to call with any errors found.
3562 """
3563
3564 line = clean_lines.elided[linenum] # get rid of comments and strings
3565
3566 if Match(r'\s*{\s*$', line):
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003567 # We allow an open brace to start a line in the case where someone is using
3568 # braces in a block to explicitly create a new scope, which is commonly used
3569 # to control the lifetime of stack-allocated variables. Braces are also
3570 # used for brace initializers inside function calls. We don't detect this
3571 # perfectly: we just don't complain if the last non-whitespace character on
3572 # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
3573 # previous line starts a preprocessor block.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003574 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003575 if (not Search(r'[,;:}{(]\s*$', prevline) and
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003576 not Match(r'\s*#', prevline)):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003577 error(filename, linenum, 'whitespace/braces', 4,
3578 '{ should almost always be at the end of the previous line')
3579
3580 # An else clause should be on the same line as the preceding closing brace.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003581 if Match(r'\s*else\b\s*(?:if\b|\{|$)', line):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003582 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3583 if Match(r'\s*}\s*$', prevline):
3584 error(filename, linenum, 'whitespace/newline', 4,
3585 'An else should appear on the same line as the preceding }')
3586
3587 # If braces come on one side of an else, they should be on both.
3588 # However, we have to worry about "else if" that spans multiple lines!
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003589 if Search(r'else if\s*\(', line): # could be multi-line if
3590 brace_on_left = bool(Search(r'}\s*else if\s*\(', line))
3591 # find the ( after the if
3592 pos = line.find('else if')
3593 pos = line.find('(', pos)
3594 if pos > 0:
3595 (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
3596 brace_on_right = endline[endpos:].find('{') != -1
3597 if brace_on_left != brace_on_right: # must be brace after if
3598 error(filename, linenum, 'readability/braces', 5,
3599 'If an else has a brace on one side, it should have it on both')
3600 elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
3601 error(filename, linenum, 'readability/braces', 5,
3602 'If an else has a brace on one side, it should have it on both')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003603
3604 # Likewise, an else should never have the else clause on the same line
3605 if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
3606 error(filename, linenum, 'whitespace/newline', 4,
3607 'Else clause should never be on same line as else (use 2 lines)')
3608
3609 # In the same way, a do/while should never be on one line
3610 if Match(r'\s*do [^\s{]', line):
3611 error(filename, linenum, 'whitespace/newline', 4,
3612 'do/while clauses should not be on a single line')
3613
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003614 # Check single-line if/else bodies. The style guide says 'curly braces are not
3615 # required for single-line statements'. We additionally allow multi-line,
3616 # single statements, but we reject anything with more than one semicolon in
3617 # it. This means that the first semicolon after the if should be at the end of
3618 # its line, and the line after that should have an indent level equal to or
3619 # lower than the if. We also check for ambiguous if/else nesting without
3620 # braces.
3621 if_else_match = Search(r'\b(if\s*\(|else\b)', line)
3622 if if_else_match and not Match(r'\s*#', line):
3623 if_indent = GetIndentLevel(line)
3624 endline, endlinenum, endpos = line, linenum, if_else_match.end()
3625 if_match = Search(r'\bif\s*\(', line)
3626 if if_match:
3627 # This could be a multiline if condition, so find the end first.
3628 pos = if_match.end() - 1
3629 (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos)
3630 # Check for an opening brace, either directly after the if or on the next
3631 # line. If found, this isn't a single-statement conditional.
3632 if (not Match(r'\s*{', endline[endpos:])
3633 and not (Match(r'\s*$', endline[endpos:])
3634 and endlinenum < (len(clean_lines.elided) - 1)
3635 and Match(r'\s*{', clean_lines.elided[endlinenum + 1]))):
3636 while (endlinenum < len(clean_lines.elided)
3637 and ';' not in clean_lines.elided[endlinenum][endpos:]):
3638 endlinenum += 1
3639 endpos = 0
3640 if endlinenum < len(clean_lines.elided):
3641 endline = clean_lines.elided[endlinenum]
3642 # We allow a mix of whitespace and closing braces (e.g. for one-liner
3643 # methods) and a single \ after the semicolon (for macros)
3644 endpos = endline.find(';')
3645 if not Match(r';[\s}]*(\\?)$', endline[endpos:]):
3646 # Semicolon isn't the last character, there's something trailing
3647 error(filename, linenum, 'readability/braces', 4,
3648 'If/else bodies with multiple statements require braces')
3649 elif endlinenum < len(clean_lines.elided) - 1:
3650 # Make sure the next line is dedented
3651 next_line = clean_lines.elided[endlinenum + 1]
3652 next_indent = GetIndentLevel(next_line)
3653 # With ambiguous nested if statements, this will error out on the
3654 # if that *doesn't* match the else, regardless of whether it's the
3655 # inner one or outer one.
3656 if (if_match and Match(r'\s*else\b', next_line)
3657 and next_indent != if_indent):
3658 error(filename, linenum, 'readability/braces', 4,
3659 'Else clause should be indented at the same level as if. '
3660 'Ambiguous nested if/else chains require braces.')
3661 elif next_indent > if_indent:
3662 error(filename, linenum, 'readability/braces', 4,
3663 'If/else bodies with multiple statements require braces')
3664
3665
3666def CheckTrailingSemicolon(filename, clean_lines, linenum, error):
3667 """Looks for redundant trailing semicolon.
3668
3669 Args:
3670 filename: The name of the current file.
3671 clean_lines: A CleansedLines instance containing the file.
3672 linenum: The number of the line to check.
3673 error: The function to call with any errors found.
3674 """
3675
3676 line = clean_lines.elided[linenum]
3677
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003678 # Block bodies should not be followed by a semicolon. Due to C++11
3679 # brace initialization, there are more places where semicolons are
3680 # required than not, so we use a whitelist approach to check these
3681 # rather than a blacklist. These are the places where "};" should
3682 # be replaced by just "}":
3683 # 1. Some flavor of block following closing parenthesis:
3684 # for (;;) {};
3685 # while (...) {};
3686 # switch (...) {};
3687 # Function(...) {};
3688 # if (...) {};
3689 # if (...) else if (...) {};
3690 #
3691 # 2. else block:
3692 # if (...) else {};
3693 #
3694 # 3. const member function:
3695 # Function(...) const {};
3696 #
3697 # 4. Block following some statement:
3698 # x = 42;
3699 # {};
3700 #
3701 # 5. Block at the beginning of a function:
3702 # Function(...) {
3703 # {};
3704 # }
3705 #
3706 # Note that naively checking for the preceding "{" will also match
3707 # braces inside multi-dimensional arrays, but this is fine since
3708 # that expression will not contain semicolons.
3709 #
3710 # 6. Block following another block:
3711 # while (true) {}
3712 # {};
3713 #
3714 # 7. End of namespaces:
3715 # namespace {};
3716 #
3717 # These semicolons seems far more common than other kinds of
3718 # redundant semicolons, possibly due to people converting classes
3719 # to namespaces. For now we do not warn for this case.
3720 #
3721 # Try matching case 1 first.
3722 match = Match(r'^(.*\)\s*)\{', line)
3723 if match:
3724 # Matched closing parenthesis (case 1). Check the token before the
3725 # matching opening parenthesis, and don't warn if it looks like a
3726 # macro. This avoids these false positives:
3727 # - macro that defines a base class
3728 # - multi-line macro that defines a base class
3729 # - macro that defines the whole class-head
3730 #
3731 # But we still issue warnings for macros that we know are safe to
3732 # warn, specifically:
3733 # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
3734 # - TYPED_TEST
3735 # - INTERFACE_DEF
3736 # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
3737 #
3738 # We implement a whitelist of safe macros instead of a blacklist of
3739 # unsafe macros, even though the latter appears less frequently in
3740 # google code and would have been easier to implement. This is because
3741 # the downside for getting the whitelist wrong means some extra
3742 # semicolons, while the downside for getting the blacklist wrong
3743 # would result in compile errors.
3744 #
3745 # In addition to macros, we also don't want to warn on compound
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003746 # literals and lambdas.
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003747 closing_brace_pos = match.group(1).rfind(')')
3748 opening_parenthesis = ReverseCloseExpression(
3749 clean_lines, linenum, closing_brace_pos)
3750 if opening_parenthesis[2] > -1:
3751 line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
3752 macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003753 func = Match(r'^(.*\])\s*$', line_prefix)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003754 if ((macro and
3755 macro.group(1) not in (
3756 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
3757 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
3758 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003759 (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003760 Search(r'\s+=\s*$', line_prefix)):
3761 match = None
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003762 if (match and
3763 opening_parenthesis[1] > 1 and
3764 Search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])):
3765 # Multi-line lambda-expression
3766 match = None
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003767
3768 else:
3769 # Try matching cases 2-3.
3770 match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
3771 if not match:
3772 # Try matching cases 4-6. These are always matched on separate lines.
3773 #
3774 # Note that we can't simply concatenate the previous line to the
3775 # current line and do a single match, otherwise we may output
3776 # duplicate warnings for the blank line case:
3777 # if (cond) {
3778 # // blank line
3779 # }
3780 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3781 if prevline and Search(r'[;{}]\s*$', prevline):
3782 match = Match(r'^(\s*)\{', line)
3783
3784 # Check matching closing brace
3785 if match:
3786 (endline, endlinenum, endpos) = CloseExpression(
3787 clean_lines, linenum, len(match.group(1)))
3788 if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
3789 # Current {} pair is eligible for semicolon check, and we have found
3790 # the redundant semicolon, output warning here.
3791 #
3792 # Note: because we are scanning forward for opening braces, and
3793 # outputting warnings for the matching closing brace, if there are
3794 # nested blocks with trailing semicolons, we will get the error
3795 # messages in reversed order.
3796 error(filename, endlinenum, 'readability/braces', 4,
3797 "You don't need a ; after a }")
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003798
3799
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003800def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
3801 """Look for empty loop/conditional body with only a single semicolon.
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003802
3803 Args:
3804 filename: The name of the current file.
3805 clean_lines: A CleansedLines instance containing the file.
3806 linenum: The number of the line to check.
3807 error: The function to call with any errors found.
3808 """
3809
3810 # Search for loop keywords at the beginning of the line. Because only
3811 # whitespaces are allowed before the keywords, this will also ignore most
3812 # do-while-loops, since those lines should start with closing brace.
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003813 #
3814 # We also check "if" blocks here, since an empty conditional block
3815 # is likely an error.
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003816 line = clean_lines.elided[linenum]
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003817 matched = Match(r'\s*(for|while|if)\s*\(', line)
3818 if matched:
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003819 # Find the end of the conditional expression
3820 (end_line, end_linenum, end_pos) = CloseExpression(
3821 clean_lines, linenum, line.find('('))
3822
3823 # Output warning if what follows the condition expression is a semicolon.
3824 # No warning for all other cases, including whitespace or newline, since we
3825 # have a separate check for semicolons preceded by whitespace.
3826 if end_pos >= 0 and Match(r';', end_line[end_pos:]):
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003827 if matched.group(1) == 'if':
3828 error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
3829 'Empty conditional bodies should use {}')
3830 else:
3831 error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
3832 'Empty loop bodies should use {} or continue')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003833
3834
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003835def FindCheckMacro(line):
3836 """Find a replaceable CHECK-like macro.
3837
3838 Args:
3839 line: line to search on.
3840 Returns:
3841 (macro name, start position), or (None, -1) if no replaceable
3842 macro is found.
3843 """
3844 for macro in _CHECK_MACROS:
3845 i = line.find(macro)
3846 if i >= 0:
3847 # Find opening parenthesis. Do a regular expression match here
3848 # to make sure that we are matching the expected CHECK macro, as
3849 # opposed to some other macro that happens to contain the CHECK
3850 # substring.
3851 matched = Match(r'^(.*\b' + macro + r'\s*)\(', line)
3852 if not matched:
3853 continue
3854 return (macro, len(matched.group(1)))
3855 return (None, -1)
3856
3857
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003858def CheckCheck(filename, clean_lines, linenum, error):
3859 """Checks the use of CHECK and EXPECT macros.
3860
3861 Args:
3862 filename: The name of the current file.
3863 clean_lines: A CleansedLines instance containing the file.
3864 linenum: The number of the line to check.
3865 error: The function to call with any errors found.
3866 """
3867
3868 # Decide the set of replacement macros that should be suggested
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003869 lines = clean_lines.elided
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003870 (check_macro, start_pos) = FindCheckMacro(lines[linenum])
3871 if not check_macro:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003872 return
3873
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003874 # Find end of the boolean expression by matching parentheses
3875 (last_line, end_line, end_pos) = CloseExpression(
3876 clean_lines, linenum, start_pos)
3877 if end_pos < 0:
3878 return
3879 if linenum == end_line:
3880 expression = lines[linenum][start_pos + 1:end_pos - 1]
3881 else:
3882 expression = lines[linenum][start_pos + 1:]
3883 for i in xrange(linenum + 1, end_line):
3884 expression += lines[i]
3885 expression += last_line[0:end_pos - 1]
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003886
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003887 # Parse expression so that we can take parentheses into account.
3888 # This avoids false positives for inputs like "CHECK((a < 4) == b)",
3889 # which is not replaceable by CHECK_LE.
3890 lhs = ''
3891 rhs = ''
3892 operator = None
3893 while expression:
3894 matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
3895 r'==|!=|>=|>|<=|<|\()(.*)$', expression)
3896 if matched:
3897 token = matched.group(1)
3898 if token == '(':
3899 # Parenthesized operand
3900 expression = matched.group(2)
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00003901 (end, _) = FindEndOfExpressionInLine(expression, 0, ['('])
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00003902 if end < 0:
3903 return # Unmatched parenthesis
3904 lhs += '(' + expression[0:end]
3905 expression = expression[end:]
3906 elif token in ('&&', '||'):
3907 # Logical and/or operators. This means the expression
3908 # contains more than one term, for example:
3909 # CHECK(42 < a && a < b);
3910 #
3911 # These are not replaceable with CHECK_LE, so bail out early.
3912 return
3913 elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
3914 # Non-relational operator
3915 lhs += token
3916 expression = matched.group(2)
3917 else:
3918 # Relational operator
3919 operator = token
3920 rhs = matched.group(2)
3921 break
3922 else:
3923 # Unparenthesized operand. Instead of appending to lhs one character
3924 # at a time, we do another regular expression match to consume several
3925 # characters at once if possible. Trivial benchmark shows that this
3926 # is more efficient when the operands are longer than a single
3927 # character, which is generally the case.
3928 matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
3929 if not matched:
3930 matched = Match(r'^(\s*\S)(.*)$', expression)
3931 if not matched:
3932 break
3933 lhs += matched.group(1)
3934 expression = matched.group(2)
3935
3936 # Only apply checks if we got all parts of the boolean expression
3937 if not (lhs and operator and rhs):
3938 return
3939
3940 # Check that rhs do not contain logical operators. We already know
3941 # that lhs is fine since the loop above parses out && and ||.
3942 if rhs.find('&&') > -1 or rhs.find('||') > -1:
3943 return
3944
3945 # At least one of the operands must be a constant literal. This is
3946 # to avoid suggesting replacements for unprintable things like
3947 # CHECK(variable != iterator)
3948 #
3949 # The following pattern matches decimal, hex integers, strings, and
3950 # characters (in that order).
3951 lhs = lhs.strip()
3952 rhs = rhs.strip()
3953 match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
3954 if Match(match_constant, lhs) or Match(match_constant, rhs):
3955 # Note: since we know both lhs and rhs, we can provide a more
3956 # descriptive error message like:
3957 # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
3958 # Instead of:
3959 # Consider using CHECK_EQ instead of CHECK(a == b)
3960 #
3961 # We are still keeping the less descriptive message because if lhs
3962 # or rhs gets long, the error message might become unreadable.
3963 error(filename, linenum, 'readability/check', 2,
3964 'Consider using %s instead of %s(a %s b)' % (
3965 _CHECK_REPLACEMENT[check_macro][operator],
3966 check_macro, operator))
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003967
3968
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00003969def CheckAltTokens(filename, clean_lines, linenum, error):
3970 """Check alternative keywords being used in boolean expressions.
3971
3972 Args:
3973 filename: The name of the current file.
3974 clean_lines: A CleansedLines instance containing the file.
3975 linenum: The number of the line to check.
3976 error: The function to call with any errors found.
3977 """
3978 line = clean_lines.elided[linenum]
3979
3980 # Avoid preprocessor lines
3981 if Match(r'^\s*#', line):
3982 return
3983
3984 # Last ditch effort to avoid multi-line comments. This will not help
3985 # if the comment started before the current line or ended after the
3986 # current line, but it catches most of the false positives. At least,
3987 # it provides a way to workaround this warning for people who use
3988 # multi-line comments in preprocessor macros.
3989 #
3990 # TODO(unknown): remove this once cpplint has better support for
3991 # multi-line comments.
3992 if line.find('/*') >= 0 or line.find('*/') >= 0:
3993 return
3994
3995 for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
3996 error(filename, linenum, 'readability/alt_tokens', 2,
3997 'Use operator %s instead of %s' % (
3998 _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
3999
4000
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004001def GetLineWidth(line):
4002 """Determines the width of the line in column positions.
4003
4004 Args:
4005 line: A string, which may be a Unicode string.
4006
4007 Returns:
4008 The width of the line in column positions, accounting for Unicode
4009 combining characters and wide characters.
4010 """
4011 if isinstance(line, unicode):
4012 width = 0
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004013 for uc in unicodedata.normalize('NFC', line):
4014 if unicodedata.east_asian_width(uc) in ('W', 'F'):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004015 width += 2
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004016 elif not unicodedata.combining(uc):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004017 width += 1
4018 return width
4019 else:
4020 return len(line)
4021
4022
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00004023def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004024 error):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004025 """Checks rules from the 'C++ style rules' section of cppguide.html.
4026
4027 Most of these rules are hard to test (naming, comment style), but we
4028 do what we can. In particular we check for 2-space indents, line lengths,
4029 tab usage, spaces inside code, etc.
4030
4031 Args:
4032 filename: The name of the current file.
4033 clean_lines: A CleansedLines instance containing the file.
4034 linenum: The number of the line to check.
4035 file_extension: The extension (without the dot) of the filename.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004036 nesting_state: A NestingState instance which maintains information about
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00004037 the current stack of nested blocks being parsed.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004038 error: The function to call with any errors found.
4039 """
4040
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004041 # Don't use "elided" lines here, otherwise we can't check commented lines.
4042 # Don't want to use "raw" either, because we don't want to check inside C++11
4043 # raw strings,
4044 raw_lines = clean_lines.lines_without_raw_strings
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004045 line = raw_lines[linenum]
4046
4047 if line.find('\t') != -1:
4048 error(filename, linenum, 'whitespace/tab', 1,
4049 'Tab found; better to use spaces')
4050
4051 # One or three blank spaces at the beginning of the line is weird; it's
4052 # hard to reconcile that with 2-space indents.
4053 # NOTE: here are the conditions rob pike used for his tests. Mine aren't
4054 # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
4055 # if(RLENGTH > 20) complain = 0;
4056 # if(match($0, " +(error|private|public|protected):")) complain = 0;
4057 # if(match(prev, "&& *$")) complain = 0;
4058 # if(match(prev, "\\|\\| *$")) complain = 0;
4059 # if(match(prev, "[\",=><] *$")) complain = 0;
4060 # if(match($0, " <<")) complain = 0;
4061 # if(match(prev, " +for \\(")) complain = 0;
4062 # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004063 scope_or_label_pattern = r'\s*\w+\s*:\s*\\?$'
4064 classinfo = nesting_state.InnermostClass()
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004065 initial_spaces = 0
4066 cleansed_line = clean_lines.elided[linenum]
4067 while initial_spaces < len(line) and line[initial_spaces] == ' ':
4068 initial_spaces += 1
4069 if line and line[-1].isspace():
4070 error(filename, linenum, 'whitespace/end_of_line', 4,
4071 'Line ends in whitespace. Consider deleting these extra spaces.')
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004072 # There are certain situations we allow one space, notably for
4073 # section labels, and also lines containing multi-line raw strings.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004074 elif ((initial_spaces == 1 or initial_spaces == 3) and
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004075 not Match(scope_or_label_pattern, cleansed_line) and
4076 not (clean_lines.raw_lines[linenum] != line and
4077 Match(r'^\s*""', line))):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004078 error(filename, linenum, 'whitespace/indent', 3,
4079 'Weird number of spaces at line-start. '
4080 'Are you using a 2-space indent?')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004081
4082 # Check if the line is a header guard.
4083 is_header_guard = False
4084 if file_extension == 'h':
4085 cppvar = GetHeaderGuardCPPVariable(filename)
4086 if (line.startswith('#ifndef %s' % cppvar) or
4087 line.startswith('#define %s' % cppvar) or
4088 line.startswith('#endif // %s' % cppvar)):
4089 is_header_guard = True
4090 # #include lines and header guards can be long, since there's no clean way to
4091 # split them.
erg@google.com6317a9c2009-06-25 00:28:19 +00004092 #
4093 # URLs can be long too. It's possible to split these, but it makes them
4094 # harder to cut&paste.
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004095 #
4096 # The "$Id:...$" comment may also get very long without it being the
4097 # developers fault.
erg@google.com6317a9c2009-06-25 00:28:19 +00004098 if (not line.startswith('#include') and not is_header_guard and
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004099 not Match(r'^\s*//.*http(s?)://\S*$', line) and
4100 not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004101 line_width = GetLineWidth(line)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004102 extended_length = int((_line_length * 1.25))
4103 if line_width > extended_length:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004104 error(filename, linenum, 'whitespace/line_length', 4,
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004105 'Lines should very rarely be longer than %i characters' %
4106 extended_length)
4107 elif line_width > _line_length:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004108 error(filename, linenum, 'whitespace/line_length', 2,
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004109 'Lines should be <= %i characters long' % _line_length)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004110
4111 if (cleansed_line.count(';') > 1 and
4112 # for loops are allowed two ;'s (and may run over two lines).
4113 cleansed_line.find('for') == -1 and
4114 (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
4115 GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
4116 # It's ok to have many commands in a switch case that fits in 1 line
4117 not ((cleansed_line.find('case ') != -1 or
4118 cleansed_line.find('default:') != -1) and
4119 cleansed_line.find('break;') != -1)):
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00004120 error(filename, linenum, 'whitespace/newline', 0,
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004121 'More than one command on the same line')
4122
4123 # Some more style checks
4124 CheckBraces(filename, clean_lines, linenum, error)
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004125 CheckTrailingSemicolon(filename, clean_lines, linenum, error)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004126 CheckEmptyBlockBody(filename, clean_lines, linenum, error)
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00004127 CheckAccess(filename, clean_lines, linenum, nesting_state, error)
4128 CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004129 CheckOperatorSpacing(filename, clean_lines, linenum, error)
4130 CheckParenthesisSpacing(filename, clean_lines, linenum, error)
4131 CheckCommaSpacing(filename, clean_lines, linenum, error)
4132 CheckBracesSpacing(filename, clean_lines, linenum, error)
4133 CheckSpacingForFunctionCall(filename, clean_lines, linenum, error)
4134 CheckRValueReference(filename, clean_lines, linenum, nesting_state, error)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004135 CheckCheck(filename, clean_lines, linenum, error)
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00004136 CheckAltTokens(filename, clean_lines, linenum, error)
4137 classinfo = nesting_state.InnermostClass()
4138 if classinfo:
4139 CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004140
4141
4142_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
4143_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
4144# Matches the first component of a filename delimited by -s and _s. That is:
4145# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
4146# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
4147# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
4148# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
4149_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
4150
4151
4152def _DropCommonSuffixes(filename):
4153 """Drops common suffixes like _test.cc or -inl.h from filename.
4154
4155 For example:
4156 >>> _DropCommonSuffixes('foo/foo-inl.h')
4157 'foo/foo'
4158 >>> _DropCommonSuffixes('foo/bar/foo.cc')
4159 'foo/bar/foo'
4160 >>> _DropCommonSuffixes('foo/foo_internal.h')
4161 'foo/foo'
4162 >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
4163 'foo/foo_unusualinternal'
4164
4165 Args:
4166 filename: The input filename.
4167
4168 Returns:
4169 The filename with the common suffix removed.
4170 """
4171 for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
4172 'inl.h', 'impl.h', 'internal.h'):
4173 if (filename.endswith(suffix) and len(filename) > len(suffix) and
4174 filename[-len(suffix) - 1] in ('-', '_')):
4175 return filename[:-len(suffix) - 1]
4176 return os.path.splitext(filename)[0]
4177
4178
4179def _IsTestFilename(filename):
4180 """Determines if the given filename has a suffix that identifies it as a test.
4181
4182 Args:
4183 filename: The input filename.
4184
4185 Returns:
4186 True if 'filename' looks like a test, False otherwise.
4187 """
4188 if (filename.endswith('_test.cc') or
4189 filename.endswith('_unittest.cc') or
4190 filename.endswith('_regtest.cc')):
4191 return True
4192 else:
4193 return False
4194
4195
4196def _ClassifyInclude(fileinfo, include, is_system):
4197 """Figures out what kind of header 'include' is.
4198
4199 Args:
4200 fileinfo: The current file cpplint is running over. A FileInfo instance.
4201 include: The path to a #included file.
4202 is_system: True if the #include used <> rather than "".
4203
4204 Returns:
4205 One of the _XXX_HEADER constants.
4206
4207 For example:
4208 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
4209 _C_SYS_HEADER
4210 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
4211 _CPP_SYS_HEADER
4212 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
4213 _LIKELY_MY_HEADER
4214 >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
4215 ... 'bar/foo_other_ext.h', False)
4216 _POSSIBLE_MY_HEADER
4217 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
4218 _OTHER_HEADER
4219 """
4220 # This is a list of all standard c++ header files, except
4221 # those already checked for above.
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004222 is_cpp_h = include in _CPP_HEADERS
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004223
4224 if is_system:
4225 if is_cpp_h:
4226 return _CPP_SYS_HEADER
4227 else:
4228 return _C_SYS_HEADER
4229
4230 # If the target file and the include we're checking share a
4231 # basename when we drop common extensions, and the include
4232 # lives in . , then it's likely to be owned by the target file.
4233 target_dir, target_base = (
4234 os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
4235 include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
4236 if target_base == include_base and (
4237 include_dir == target_dir or
4238 include_dir == os.path.normpath(target_dir + '/../public')):
4239 return _LIKELY_MY_HEADER
4240
4241 # If the target and include share some initial basename
4242 # component, it's possible the target is implementing the
4243 # include, so it's allowed to be first, but we'll never
4244 # complain if it's not there.
4245 target_first_component = _RE_FIRST_COMPONENT.match(target_base)
4246 include_first_component = _RE_FIRST_COMPONENT.match(include_base)
4247 if (target_first_component and include_first_component and
4248 target_first_component.group(0) ==
4249 include_first_component.group(0)):
4250 return _POSSIBLE_MY_HEADER
4251
4252 return _OTHER_HEADER
4253
4254
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004255
erg@google.com6317a9c2009-06-25 00:28:19 +00004256def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
4257 """Check rules that are applicable to #include lines.
4258
4259 Strings on #include lines are NOT removed from elided line, to make
4260 certain tasks easier. However, to prevent false positives, checks
4261 applicable to #include lines in CheckLanguage must be put here.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004262
4263 Args:
4264 filename: The name of the current file.
4265 clean_lines: A CleansedLines instance containing the file.
4266 linenum: The number of the line to check.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004267 include_state: An _IncludeState instance in which the headers are inserted.
4268 error: The function to call with any errors found.
4269 """
4270 fileinfo = FileInfo(filename)
erg@google.com6317a9c2009-06-25 00:28:19 +00004271 line = clean_lines.lines[linenum]
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004272
4273 # "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.com6317a9c2009-06-25 00:28:19 +00004274 if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004275 error(filename, linenum, 'build/include', 4,
4276 'Include the directory when naming .h files')
4277
4278 # we shouldn't include a file more than once. actually, there are a
4279 # handful of instances where doing so is okay, but in general it's
4280 # not.
erg@google.com6317a9c2009-06-25 00:28:19 +00004281 match = _RE_PATTERN_INCLUDE.search(line)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004282 if match:
4283 include = match.group(2)
4284 is_system = (match.group(1) == '<')
4285 if include in include_state:
4286 error(filename, linenum, 'build/include', 4,
4287 '"%s" already included at %s:%s' %
4288 (include, filename, include_state[include]))
4289 else:
4290 include_state[include] = linenum
4291
4292 # We want to ensure that headers appear in the right order:
4293 # 1) for foo.cc, foo.h (preferred location)
4294 # 2) c system files
4295 # 3) cpp system files
4296 # 4) for foo.cc, foo.h (deprecated location)
4297 # 5) other google headers
4298 #
4299 # We classify each include statement as one of those 5 types
4300 # using a number of techniques. The include_state object keeps
4301 # track of the highest type seen, and complains if we see a
4302 # lower type after that.
4303 error_message = include_state.CheckNextIncludeOrder(
4304 _ClassifyInclude(fileinfo, include, is_system))
4305 if error_message:
4306 error(filename, linenum, 'build/include_order', 4,
4307 '%s. Should be: %s.h, c system, c++ system, other.' %
4308 (error_message, fileinfo.BaseName()))
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004309 canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
4310 if not include_state.IsInAlphabeticalOrder(
4311 clean_lines, linenum, canonical_include):
erg@google.com26970fa2009-11-17 18:07:32 +00004312 error(filename, linenum, 'build/include_alpha', 4,
4313 'Include "%s" not in alphabetical order' % include)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004314 include_state.SetLastHeader(canonical_include)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004315
erg@google.com6317a9c2009-06-25 00:28:19 +00004316 # Look for any of the stream classes that are part of standard C++.
4317 match = _RE_PATTERN_INCLUDE.match(line)
4318 if match:
4319 include = match.group(2)
4320 if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
4321 # Many unit tests use cout, so we exempt them.
4322 if not _IsTestFilename(filename):
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004323 # Suggest a different header for ostream
4324 if include == 'ostream':
4325 error(filename, linenum, 'readability/streams', 3,
4326 'For logging, include "base/logging.h" instead of <ostream>.')
4327 else:
4328 error(filename, linenum, 'readability/streams', 3,
4329 'Streams are highly discouraged.')
erg@google.com6317a9c2009-06-25 00:28:19 +00004330
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004331
4332def _GetTextInside(text, start_pattern):
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004333 r"""Retrieves all the text between matching open and close parentheses.
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004334
4335 Given a string of lines and a regular expression string, retrieve all the text
4336 following the expression and between opening punctuation symbols like
4337 (, [, or {, and the matching close-punctuation symbol. This properly nested
4338 occurrences of the punctuations, so for the text like
4339 printf(a(), b(c()));
4340 a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
4341 start_pattern must match string having an open punctuation symbol at the end.
4342
4343 Args:
4344 text: The lines to extract text. Its comments and strings must be elided.
4345 It can be single line and can span multiple lines.
4346 start_pattern: The regexp string indicating where to start extracting
4347 the text.
4348 Returns:
4349 The extracted text.
4350 None if either the opening string or ending punctuation could not be found.
4351 """
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004352 # TODO(unknown): Audit cpplint.py to see what places could be profitably
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004353 # rewritten to use _GetTextInside (and use inferior regexp matching today).
4354
4355 # Give opening punctuations to get the matching close-punctuations.
4356 matching_punctuation = {'(': ')', '{': '}', '[': ']'}
4357 closing_punctuation = set(matching_punctuation.itervalues())
4358
4359 # Find the position to start extracting text.
4360 match = re.search(start_pattern, text, re.M)
4361 if not match: # start_pattern not found in text.
4362 return None
4363 start_position = match.end(0)
4364
4365 assert start_position > 0, (
4366 'start_pattern must ends with an opening punctuation.')
4367 assert text[start_position - 1] in matching_punctuation, (
4368 'start_pattern must ends with an opening punctuation.')
4369 # Stack of closing punctuations we expect to have in text after position.
4370 punctuation_stack = [matching_punctuation[text[start_position - 1]]]
4371 position = start_position
4372 while punctuation_stack and position < len(text):
4373 if text[position] == punctuation_stack[-1]:
4374 punctuation_stack.pop()
4375 elif text[position] in closing_punctuation:
4376 # A closing punctuation without matching opening punctuations.
4377 return None
4378 elif text[position] in matching_punctuation:
4379 punctuation_stack.append(matching_punctuation[text[position]])
4380 position += 1
4381 if punctuation_stack:
4382 # Opening punctuations left without matching close-punctuations.
4383 return None
4384 # punctuations match.
4385 return text[start_position:position - 1]
4386
4387
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004388# Patterns for matching call-by-reference parameters.
4389#
4390# Supports nested templates up to 2 levels deep using this messy pattern:
4391# < (?: < (?: < [^<>]*
4392# >
4393# | [^<>] )*
4394# >
4395# | [^<>] )*
4396# >
4397_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]*
4398_RE_PATTERN_TYPE = (
4399 r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
4400 r'(?:\w|'
4401 r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
4402 r'::)+')
4403# A call-by-reference parameter ends with '& identifier'.
4404_RE_PATTERN_REF_PARAM = re.compile(
4405 r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
4406 r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
4407# A call-by-const-reference parameter either ends with 'const& identifier'
4408# or looks like 'const type& identifier' when 'type' is atomic.
4409_RE_PATTERN_CONST_REF_PARAM = (
4410 r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
4411 r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
4412
4413
4414def CheckLanguage(filename, clean_lines, linenum, file_extension,
4415 include_state, nesting_state, error):
erg@google.com6317a9c2009-06-25 00:28:19 +00004416 """Checks rules from the 'C++ language rules' section of cppguide.html.
4417
4418 Some of these rules are hard to test (function overloading, using
4419 uint32 inappropriately), but we do the best we can.
4420
4421 Args:
4422 filename: The name of the current file.
4423 clean_lines: A CleansedLines instance containing the file.
4424 linenum: The number of the line to check.
4425 file_extension: The extension (without the dot) of the filename.
4426 include_state: An _IncludeState instance in which the headers are inserted.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004427 nesting_state: A NestingState instance which maintains information about
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004428 the current stack of nested blocks being parsed.
erg@google.com6317a9c2009-06-25 00:28:19 +00004429 error: The function to call with any errors found.
4430 """
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004431 # If the line is empty or consists of entirely a comment, no need to
4432 # check it.
4433 line = clean_lines.elided[linenum]
4434 if not line:
4435 return
4436
erg@google.com6317a9c2009-06-25 00:28:19 +00004437 match = _RE_PATTERN_INCLUDE.search(line)
4438 if match:
4439 CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
4440 return
4441
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004442 # Reset include state across preprocessor directives. This is meant
4443 # to silence warnings for conditional includes.
4444 if Match(r'^\s*#\s*(?:ifdef|elif|else|endif)\b', line):
4445 include_state.ResetSection()
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004446
4447 # Make Windows paths like Unix.
4448 fullname = os.path.abspath(filename).replace('\\', '/')
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004449
4450 # Perform other checks now that we are sure that this is not an include line
4451 CheckCasts(filename, clean_lines, linenum, error)
4452 CheckGlobalStatic(filename, clean_lines, linenum, error)
4453 CheckPrintf(filename, clean_lines, linenum, error)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004454
4455 if file_extension == 'h':
4456 # TODO(unknown): check that 1-arg constructors are explicit.
4457 # How to tell it's a constructor?
4458 # (handled in CheckForNonStandardConstructs for now)
4459 # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
4460 # (level 1 error)
4461 pass
4462
4463 # Check if people are using the verboten C basic types. The only exception
4464 # we regularly allow is "unsigned short port" for port.
4465 if Search(r'\bshort port\b', line):
4466 if not Search(r'\bunsigned short port\b', line):
4467 error(filename, linenum, 'runtime/int', 4,
4468 'Use "unsigned short" for ports, not "short"')
4469 else:
4470 match = Search(r'\b(short|long(?! +double)|long long)\b', line)
4471 if match:
4472 error(filename, linenum, 'runtime/int', 4,
4473 'Use int16/int64/etc, rather than the C type %s' % match.group(1))
4474
erg@google.com26970fa2009-11-17 18:07:32 +00004475 # Check if some verboten operator overloading is going on
4476 # TODO(unknown): catch out-of-line unary operator&:
4477 # class X {};
4478 # int operator&(const X& x) { return 42; } // unary operator&
4479 # The trick is it's hard to tell apart from binary operator&:
4480 # class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
4481 if Search(r'\boperator\s*&\s*\(\s*\)', line):
4482 error(filename, linenum, 'runtime/operator', 4,
4483 'Unary operator& is dangerous. Do not use it.')
4484
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004485 # Check for suspicious usage of "if" like
4486 # } if (a == b) {
4487 if Search(r'\}\s*if\s*\(', line):
4488 error(filename, linenum, 'readability/braces', 4,
4489 'Did you mean "else if"? If not, start a new line for "if".')
4490
4491 # Check for potential format string bugs like printf(foo).
4492 # We constrain the pattern not to pick things like DocidForPrintf(foo).
4493 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004494 # TODO(unknown): Catch the following case. Need to change the calling
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004495 # convention of the whole function to process multiple line to handle it.
4496 # printf(
4497 # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
4498 printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
4499 if printf_args:
4500 match = Match(r'([\w.\->()]+)$', printf_args)
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00004501 if match and match.group(1) != '__VA_ARGS__':
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004502 function_name = re.search(r'\b((?:string)?printf)\s*\(',
4503 line, re.I).group(1)
4504 error(filename, linenum, 'runtime/printf', 4,
4505 'Potential format string bug. Do %s("%%s", %s) instead.'
4506 % (function_name, match.group(1)))
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004507
4508 # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
4509 match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
4510 if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
4511 error(filename, linenum, 'runtime/memset', 4,
4512 'Did you mean "memset(%s, 0, %s)"?'
4513 % (match.group(1), match.group(2)))
4514
4515 if Search(r'\busing namespace\b', line):
4516 error(filename, linenum, 'build/namespaces', 5,
4517 'Do not use namespace using-directives. '
4518 'Use using-declarations instead.')
4519
4520 # Detect variable-length arrays.
4521 match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
4522 if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
4523 match.group(3).find(']') == -1):
4524 # Split the size using space and arithmetic operators as delimiters.
4525 # If any of the resulting tokens are not compile time constants then
4526 # report the error.
4527 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
4528 is_const = True
4529 skip_next = False
4530 for tok in tokens:
4531 if skip_next:
4532 skip_next = False
4533 continue
4534
4535 if Search(r'sizeof\(.+\)', tok): continue
4536 if Search(r'arraysize\(\w+\)', tok): continue
4537
4538 tok = tok.lstrip('(')
4539 tok = tok.rstrip(')')
4540 if not tok: continue
4541 if Match(r'\d+', tok): continue
4542 if Match(r'0[xX][0-9a-fA-F]+', tok): continue
4543 if Match(r'k[A-Z0-9]\w*', tok): continue
4544 if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
4545 if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
4546 # A catch all for tricky sizeof cases, including 'sizeof expression',
4547 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004548 # requires skipping the next token because we split on ' ' and '*'.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004549 if tok.startswith('sizeof'):
4550 skip_next = True
4551 continue
4552 is_const = False
4553 break
4554 if not is_const:
4555 error(filename, linenum, 'runtime/arrays', 1,
4556 'Do not use variable-length arrays. Use an appropriately named '
4557 "('k' followed by CamelCase) compile-time constant for the size.")
4558
4559 # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
4560 # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
4561 # in the class declaration.
4562 match = Match(
4563 (r'\s*'
4564 r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
4565 r'\(.*\);$'),
4566 line)
4567 if match and linenum + 1 < clean_lines.NumLines():
4568 next_line = clean_lines.elided[linenum + 1]
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004569 # We allow some, but not all, declarations of variables to be present
4570 # in the statement that defines the class. The [\w\*,\s]* fragment of
4571 # the regular expression below allows users to declare instances of
4572 # the class or pointers to instances, but not less common types such
4573 # as function pointers or arrays. It's a tradeoff between allowing
4574 # reasonable code and avoiding trying to parse more C++ using regexps.
4575 if not Search(r'^\s*}[\w\*,\s]*;', next_line):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004576 error(filename, linenum, 'readability/constructors', 3,
4577 match.group(1) + ' should be the last thing in the class')
4578
4579 # Check for use of unnamed namespaces in header files. Registration
4580 # macros are typically OK, so we allow use of "namespace {" on lines
4581 # that end with backslashes.
4582 if (file_extension == 'h'
4583 and Search(r'\bnamespace\s*{', line)
4584 and line[-1] != '\\'):
4585 error(filename, linenum, 'build/namespaces', 4,
4586 'Do not use unnamed namespaces in header files. See '
4587 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
4588 ' for more information.')
4589
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004590
4591def CheckGlobalStatic(filename, clean_lines, linenum, error):
4592 """Check for unsafe global or static objects.
4593
4594 Args:
4595 filename: The name of the current file.
4596 clean_lines: A CleansedLines instance containing the file.
4597 linenum: The number of the line to check.
4598 error: The function to call with any errors found.
4599 """
4600 line = clean_lines.elided[linenum]
4601
4602 # Check for people declaring static/global STL strings at the top level.
4603 # This is dangerous because the C++ language does not guarantee that
4604 # globals with constructors are initialized before the first access.
4605 match = Match(
4606 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
4607 line)
4608 # Remove false positives:
4609 # - String pointers (as opposed to values).
4610 # string *pointer
4611 # const string *pointer
4612 # string const *pointer
4613 # string *const pointer
4614 #
4615 # - Functions and template specializations.
4616 # string Function<Type>(...
4617 # string Class<Type>::Method(...
4618 #
4619 # - Operators. These are matched separately because operator names
4620 # cross non-word boundaries, and trying to match both operators
4621 # and functions at the same time would decrease accuracy of
4622 # matching identifiers.
4623 # string Class::operator*()
4624 if (match and
4625 not Search(r'\bstring\b(\s+const)?\s*\*\s*(const\s+)?\w', line) and
4626 not Search(r'\boperator\W', line) and
4627 not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', match.group(3))):
4628 error(filename, linenum, 'runtime/string', 4,
4629 'For a static/global string constant, use a C style string instead: '
4630 '"%schar %s[]".' %
4631 (match.group(1), match.group(2)))
4632
4633 if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
4634 error(filename, linenum, 'runtime/init', 4,
4635 'You seem to be initializing a member variable with itself.')
4636
4637
4638def CheckPrintf(filename, clean_lines, linenum, error):
4639 """Check for printf related issues.
4640
4641 Args:
4642 filename: The name of the current file.
4643 clean_lines: A CleansedLines instance containing the file.
4644 linenum: The number of the line to check.
4645 error: The function to call with any errors found.
4646 """
4647 line = clean_lines.elided[linenum]
4648
4649 # When snprintf is used, the second argument shouldn't be a literal.
4650 match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
4651 if match and match.group(2) != '0':
4652 # If 2nd arg is zero, snprintf is used to calculate size.
4653 error(filename, linenum, 'runtime/printf', 3,
4654 'If you can, use sizeof(%s) instead of %s as the 2nd arg '
4655 'to snprintf.' % (match.group(1), match.group(2)))
4656
4657 # Check if some verboten C functions are being used.
4658 if Search(r'\bsprintf\b', line):
4659 error(filename, linenum, 'runtime/printf', 5,
4660 'Never use sprintf. Use snprintf instead.')
4661 match = Search(r'\b(strcpy|strcat)\b', line)
4662 if match:
4663 error(filename, linenum, 'runtime/printf', 4,
4664 'Almost always, snprintf is better than %s' % match.group(1))
4665
4666
4667def IsDerivedFunction(clean_lines, linenum):
4668 """Check if current line contains an inherited function.
4669
4670 Args:
4671 clean_lines: A CleansedLines instance containing the file.
4672 linenum: The number of the line to check.
4673 Returns:
4674 True if current line contains a function with "override"
4675 virt-specifier.
4676 """
4677 # Look for leftmost opening parenthesis on current line
4678 opening_paren = clean_lines.elided[linenum].find('(')
4679 if opening_paren < 0: return False
4680
4681 # Look for "override" after the matching closing parenthesis
4682 line, _, closing_paren = CloseExpression(clean_lines, linenum, opening_paren)
4683 return closing_paren >= 0 and Search(r'\boverride\b', line[closing_paren:])
4684
4685
4686def IsInitializerList(clean_lines, linenum):
4687 """Check if current line is inside constructor initializer list.
4688
4689 Args:
4690 clean_lines: A CleansedLines instance containing the file.
4691 linenum: The number of the line to check.
4692 Returns:
4693 True if current line appears to be inside constructor initializer
4694 list, False otherwise.
4695 """
4696 for i in xrange(linenum, 1, -1):
4697 line = clean_lines.elided[i]
4698 if i == linenum:
4699 remove_function_body = Match(r'^(.*)\{\s*$', line)
4700 if remove_function_body:
4701 line = remove_function_body.group(1)
4702
4703 if Search(r'\s:\s*\w+[({]', line):
4704 # A lone colon tend to indicate the start of a constructor
4705 # initializer list. It could also be a ternary operator, which
4706 # also tend to appear in constructor initializer lists as
4707 # opposed to parameter lists.
4708 return True
4709 if Search(r'\}\s*,\s*$', line):
4710 # A closing brace followed by a comma is probably the end of a
4711 # brace-initialized member in constructor initializer list.
4712 return True
4713 if Search(r'[{};]\s*$', line):
4714 # Found one of the following:
4715 # - A closing brace or semicolon, probably the end of the previous
4716 # function.
4717 # - An opening brace, probably the start of current class or namespace.
4718 #
4719 # Current line is probably not inside an initializer list since
4720 # we saw one of those things without seeing the starting colon.
4721 return False
4722
4723 # Got to the beginning of the file without seeing the start of
4724 # constructor initializer list.
4725 return False
4726
4727
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004728def CheckForNonConstReference(filename, clean_lines, linenum,
4729 nesting_state, error):
4730 """Check for non-const references.
4731
4732 Separate from CheckLanguage since it scans backwards from current
4733 line, instead of scanning forward.
4734
4735 Args:
4736 filename: The name of the current file.
4737 clean_lines: A CleansedLines instance containing the file.
4738 linenum: The number of the line to check.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004739 nesting_state: A NestingState instance which maintains information about
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004740 the current stack of nested blocks being parsed.
4741 error: The function to call with any errors found.
4742 """
4743 # Do nothing if there is no '&' on current line.
4744 line = clean_lines.elided[linenum]
4745 if '&' not in line:
4746 return
4747
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004748 # If a function is inherited, current function doesn't have much of
4749 # a choice, so any non-const references should not be blamed on
4750 # derived function.
4751 if IsDerivedFunction(clean_lines, linenum):
4752 return
4753
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004754 # Long type names may be broken across multiple lines, usually in one
4755 # of these forms:
4756 # LongType
4757 # ::LongTypeContinued &identifier
4758 # LongType::
4759 # LongTypeContinued &identifier
4760 # LongType<
4761 # ...>::LongTypeContinued &identifier
4762 #
4763 # If we detected a type split across two lines, join the previous
4764 # line to current line so that we can match const references
4765 # accordingly.
4766 #
4767 # Note that this only scans back one line, since scanning back
4768 # arbitrary number of lines would be expensive. If you have a type
4769 # that spans more than 2 lines, please use a typedef.
4770 if linenum > 1:
4771 previous = None
4772 if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
4773 # previous_line\n + ::current_line
4774 previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
4775 clean_lines.elided[linenum - 1])
4776 elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
4777 # previous_line::\n + current_line
4778 previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
4779 clean_lines.elided[linenum - 1])
4780 if previous:
4781 line = previous.group(1) + line.lstrip()
4782 else:
4783 # Check for templated parameter that is split across multiple lines
4784 endpos = line.rfind('>')
4785 if endpos > -1:
4786 (_, startline, startpos) = ReverseCloseExpression(
4787 clean_lines, linenum, endpos)
4788 if startpos > -1 and startline < linenum:
4789 # Found the matching < on an earlier line, collect all
4790 # pieces up to current line.
4791 line = ''
4792 for i in xrange(startline, linenum + 1):
4793 line += clean_lines.elided[i].strip()
4794
4795 # Check for non-const references in function parameters. A single '&' may
4796 # found in the following places:
4797 # inside expression: binary & for bitwise AND
4798 # inside expression: unary & for taking the address of something
4799 # inside declarators: reference parameter
4800 # We will exclude the first two cases by checking that we are not inside a
4801 # function body, including one that was just introduced by a trailing '{'.
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004802 # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004803 if (nesting_state.previous_stack_top and
4804 not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or
4805 isinstance(nesting_state.previous_stack_top, _NamespaceInfo))):
4806 # Not at toplevel, not within a class, and not within a namespace
4807 return
4808
4809 # Avoid preprocessors
4810 if Search(r'\\\s*$', line):
4811 return
4812
4813 # Avoid constructor initializer lists
4814 if IsInitializerList(clean_lines, linenum):
4815 return
4816
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004817 # We allow non-const references in a few standard places, like functions
4818 # called "swap()" or iostream operators like "<<" or ">>". Do not check
4819 # those function parameters.
4820 #
4821 # We also accept & in static_assert, which looks like a function but
4822 # it's actually a declaration expression.
4823 whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
4824 r'operator\s*[<>][<>]|'
4825 r'static_assert|COMPILE_ASSERT'
4826 r')\s*\(')
4827 if Search(whitelisted_functions, line):
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004828 return
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004829 elif not Search(r'\S+\([^)]*$', line):
4830 # Don't see a whitelisted function on this line. Actually we
4831 # didn't see any function name on this line, so this is likely a
4832 # multi-line parameter list. Try a bit harder to catch this case.
4833 for i in xrange(2):
4834 if (linenum > i and
4835 Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004836 return
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004837
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004838 decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body
4839 for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
4840 if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
4841 error(filename, linenum, 'runtime/references', 2,
4842 'Is this a non-const reference? '
4843 'If so, make const or use a pointer: ' +
4844 ReplaceAll(' *<', '<', parameter))
4845
4846
4847def CheckCasts(filename, clean_lines, linenum, error):
4848 """Various cast related checks.
4849
4850 Args:
4851 filename: The name of the current file.
4852 clean_lines: A CleansedLines instance containing the file.
4853 linenum: The number of the line to check.
4854 error: The function to call with any errors found.
4855 """
4856 line = clean_lines.elided[linenum]
4857
4858 # Check to see if they're using an conversion function cast.
4859 # I just try to capture the most common basic types, though there are more.
4860 # Parameterless conversion functions, such as bool(), are allowed as they are
4861 # probably a member operator declaration or default constructor.
4862 match = Search(
4863 r'(\bnew\s+|\S<\s*(?:const\s+)?)?\b'
4864 r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
4865 r'(\([^)].*)', line)
4866 expecting_function = ExpectingFunctionArgs(clean_lines, linenum)
4867 if match and not expecting_function:
4868 matched_type = match.group(2)
4869
4870 # matched_new_or_template is used to silence two false positives:
4871 # - New operators
4872 # - Template arguments with function types
4873 #
4874 # For template arguments, we match on types immediately following
4875 # an opening bracket without any spaces. This is a fast way to
4876 # silence the common case where the function type is the first
4877 # template argument. False negative with less-than comparison is
4878 # avoided because those operators are usually followed by a space.
4879 #
4880 # function<double(double)> // bracket + no space = false positive
4881 # value < double(42) // bracket + space = true positive
4882 matched_new_or_template = match.group(1)
4883
4884 # Other things to ignore:
4885 # - Function pointers
4886 # - Casts to pointer types
4887 # - Placement new
4888 # - Alias declarations
4889 matched_funcptr = match.group(3)
4890 if (matched_new_or_template is None and
4891 not (matched_funcptr and
4892 (Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
4893 matched_funcptr) or
4894 matched_funcptr.startswith('(*)'))) and
4895 not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and
4896 not Search(r'new\(\S+\)\s*' + matched_type, line)):
4897 error(filename, linenum, 'readability/casting', 4,
4898 'Using deprecated casting style. '
4899 'Use static_cast<%s>(...) instead' %
4900 matched_type)
4901
4902 if not expecting_function:
4903 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
4904 'static_cast',
4905 r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
4906
4907 # This doesn't catch all cases. Consider (const char * const)"hello".
4908 #
4909 # (char *) "foo" should always be a const_cast (reinterpret_cast won't
4910 # compile).
4911 if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
4912 'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
4913 pass
4914 else:
4915 # Check pointer casts for other than string constants
4916 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
4917 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
4918
4919 # In addition, we look for people taking the address of a cast. This
4920 # is dangerous -- casts can assign to temporaries, so the pointer doesn't
4921 # point where you think.
4922 match = Search(
4923 r'(?:&\(([^)]+)\)[\w(])|'
4924 r'(?:&(static|dynamic|down|reinterpret)_cast\b)', line)
4925 if match and match.group(1) != '*':
4926 # Try a better error message when the & is bound to something
4927 # dereferenced by the casted pointer, as opposed to the casted
4928 # pointer itself.
4929 parenthesis_error = False
4930 match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line)
4931 if match:
4932 _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1)))
4933 if x1 >= 0 and clean_lines.elided[y1][x1] == '(':
4934 _, y2, x2 = CloseExpression(clean_lines, y1, x1)
4935 if x2 >= 0:
4936 extended_line = clean_lines.elided[y2][x2:]
4937 if y2 < clean_lines.NumLines() - 1:
4938 extended_line += clean_lines.elided[y2 + 1]
4939 if Match(r'\s*(?:->|\[)', extended_line):
4940 parenthesis_error = True
4941
4942 if parenthesis_error:
4943 error(filename, linenum, 'readability/casting', 4,
4944 ('Are you taking an address of something dereferenced '
4945 'from a cast? Wrapping the dereferenced expression in '
4946 'parentheses will make the binding more obvious'))
4947 else:
4948 error(filename, linenum, 'runtime/casting', 4,
4949 ('Are you taking an address of a cast? '
4950 'This is dangerous: could be a temp var. '
4951 'Take the address before doing the cast, rather than after'))
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004952
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004953
4954def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
4955 error):
4956 """Checks for a C-style cast by looking for the pattern.
4957
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004958 Args:
4959 filename: The name of the current file.
4960 linenum: The number of the line to check.
4961 line: The line of code to check.
4962 raw_line: The raw line of code to check, with comments.
4963 cast_type: The string for the C++ cast to recommend. This is either
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004964 reinterpret_cast, static_cast, or const_cast, depending.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004965 pattern: The regular expression used to find C-style casts.
4966 error: The function to call with any errors found.
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004967
4968 Returns:
4969 True if an error was emitted.
4970 False otherwise.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004971 """
4972 match = Search(pattern, line)
4973 if not match:
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00004974 return False
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004975
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00004976 # Exclude lines with keywords that tend to look like casts, and also
4977 # macros which are generally troublesome.
4978 if Match(r'.*\b(?:sizeof|alignof|alignas|[A-Z_]+)\s*$',
4979 line[0:match.start(1) - 1]):
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004980 return False
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004981
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00004982 # operator++(int) and operator--(int)
4983 if (line[0:match.start(1) - 1].endswith(' operator++') or
4984 line[0:match.start(1) - 1].endswith(' operator--')):
4985 return False
4986
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004987 # A single unnamed argument for a function tends to look like old
4988 # style cast. If we see those, don't issue warnings for deprecated
4989 # casts, instead issue warnings for unnamed arguments where
4990 # appropriate.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004991 #
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00004992 # These are things that we want warnings for, since the style guide
4993 # explicitly require all parameters to be named:
4994 # Function(int);
4995 # Function(int) {
4996 # ConstMember(int) const;
4997 # ConstMember(int) const {
4998 # ExceptionMember(int) throw (...);
4999 # ExceptionMember(int) throw (...) {
5000 # PureVirtual(int) = 0;
5001 #
5002 # These are functions of some sort, where the compiler would be fine
5003 # if they had named parameters, but people often omit those
5004 # identifiers to reduce clutter:
5005 # (FunctionPointer)(int);
5006 # (FunctionPointer)(int) = value;
5007 # Function((function_pointer_arg)(int))
5008 # <TemplateArgument(int)>;
5009 # <(FunctionPointerTemplateArgument)(int)>;
5010 remainder = line[match.end(0):]
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005011 if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|=|>|\{|\))',
5012 remainder):
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00005013 # Looks like an unnamed parameter.
5014
5015 # Don't warn on any kind of template arguments.
5016 if Match(r'^\s*>', remainder):
5017 return False
5018
5019 # Don't warn on assignments to function pointers, but keep warnings for
5020 # unnamed parameters to pure virtual functions. Note that this pattern
5021 # will also pass on assignments of "0" to function pointers, but the
5022 # preferred values for those would be "nullptr" or "NULL".
5023 matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder)
5024 if matched_zero and matched_zero.group(1) != '0':
5025 return False
5026
5027 # Don't warn on function pointer declarations. For this we need
5028 # to check what came before the "(type)" string.
5029 if Match(r'.*\)\s*$', line[0:match.start(0)]):
5030 return False
5031
5032 # Don't warn if the parameter is named with block comments, e.g.:
5033 # Function(int /*unused_param*/);
5034 if '/*' in raw_line:
5035 return False
5036
5037 # Passed all filters, issue warning here.
5038 error(filename, linenum, 'readability/function', 3,
5039 'All parameters should be named in a function')
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005040 return True
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005041
5042 # At this point, all that should be left is actual casts.
5043 error(filename, linenum, 'readability/casting', 4,
5044 'Using C-style cast. Use %s<%s>(...) instead' %
5045 (cast_type, match.group(1)))
5046
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005047 return True
5048
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005049
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005050def ExpectingFunctionArgs(clean_lines, linenum):
5051 """Checks whether where function type arguments are expected.
5052
5053 Args:
5054 clean_lines: A CleansedLines instance containing the file.
5055 linenum: The number of the line to check.
5056
5057 Returns:
5058 True if the line at 'linenum' is inside something that expects arguments
5059 of function types.
5060 """
5061 line = clean_lines.elided[linenum]
5062 return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
5063 (linenum >= 2 and
5064 (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
5065 clean_lines.elided[linenum - 1]) or
5066 Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
5067 clean_lines.elided[linenum - 2]) or
5068 Search(r'\bstd::m?function\s*\<\s*$',
5069 clean_lines.elided[linenum - 1]))))
5070
5071
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005072_HEADERS_CONTAINING_TEMPLATES = (
5073 ('<deque>', ('deque',)),
5074 ('<functional>', ('unary_function', 'binary_function',
5075 'plus', 'minus', 'multiplies', 'divides', 'modulus',
5076 'negate',
5077 'equal_to', 'not_equal_to', 'greater', 'less',
5078 'greater_equal', 'less_equal',
5079 'logical_and', 'logical_or', 'logical_not',
5080 'unary_negate', 'not1', 'binary_negate', 'not2',
5081 'bind1st', 'bind2nd',
5082 'pointer_to_unary_function',
5083 'pointer_to_binary_function',
5084 'ptr_fun',
5085 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
5086 'mem_fun_ref_t',
5087 'const_mem_fun_t', 'const_mem_fun1_t',
5088 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
5089 'mem_fun_ref',
5090 )),
5091 ('<limits>', ('numeric_limits',)),
5092 ('<list>', ('list',)),
5093 ('<map>', ('map', 'multimap',)),
5094 ('<memory>', ('allocator',)),
5095 ('<queue>', ('queue', 'priority_queue',)),
5096 ('<set>', ('set', 'multiset',)),
5097 ('<stack>', ('stack',)),
5098 ('<string>', ('char_traits', 'basic_string',)),
5099 ('<utility>', ('pair',)),
5100 ('<vector>', ('vector',)),
5101
5102 # gcc extensions.
5103 # Note: std::hash is their hash, ::hash is our hash
5104 ('<hash_map>', ('hash_map', 'hash_multimap',)),
5105 ('<hash_set>', ('hash_set', 'hash_multiset',)),
5106 ('<slist>', ('slist',)),
5107 )
5108
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005109_RE_PATTERN_STRING = re.compile(r'\bstring\b')
5110
5111_re_pattern_algorithm_header = []
erg@google.com6317a9c2009-06-25 00:28:19 +00005112for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
5113 'transform'):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005114 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
5115 # type::max().
5116 _re_pattern_algorithm_header.append(
5117 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
5118 _template,
5119 '<algorithm>'))
5120
5121_re_pattern_templates = []
5122for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
5123 for _template in _templates:
5124 _re_pattern_templates.append(
5125 (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
5126 _template + '<>',
5127 _header))
5128
5129
erg@google.com6317a9c2009-06-25 00:28:19 +00005130def FilesBelongToSameModule(filename_cc, filename_h):
5131 """Check if these two filenames belong to the same module.
5132
5133 The concept of a 'module' here is a as follows:
5134 foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
5135 same 'module' if they are in the same directory.
5136 some/path/public/xyzzy and some/path/internal/xyzzy are also considered
5137 to belong to the same module here.
5138
5139 If the filename_cc contains a longer path than the filename_h, for example,
5140 '/absolute/path/to/base/sysinfo.cc', and this file would include
5141 'base/sysinfo.h', this function also produces the prefix needed to open the
5142 header. This is used by the caller of this function to more robustly open the
5143 header file. We don't have access to the real include paths in this context,
5144 so we need this guesswork here.
5145
5146 Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
5147 according to this implementation. Because of this, this function gives
5148 some false positives. This should be sufficiently rare in practice.
5149
5150 Args:
5151 filename_cc: is the path for the .cc file
5152 filename_h: is the path for the header path
5153
5154 Returns:
5155 Tuple with a bool and a string:
5156 bool: True if filename_cc and filename_h belong to the same module.
5157 string: the additional prefix needed to open the header file.
5158 """
5159
5160 if not filename_cc.endswith('.cc'):
5161 return (False, '')
5162 filename_cc = filename_cc[:-len('.cc')]
5163 if filename_cc.endswith('_unittest'):
5164 filename_cc = filename_cc[:-len('_unittest')]
5165 elif filename_cc.endswith('_test'):
5166 filename_cc = filename_cc[:-len('_test')]
5167 filename_cc = filename_cc.replace('/public/', '/')
5168 filename_cc = filename_cc.replace('/internal/', '/')
5169
5170 if not filename_h.endswith('.h'):
5171 return (False, '')
5172 filename_h = filename_h[:-len('.h')]
5173 if filename_h.endswith('-inl'):
5174 filename_h = filename_h[:-len('-inl')]
5175 filename_h = filename_h.replace('/public/', '/')
5176 filename_h = filename_h.replace('/internal/', '/')
5177
5178 files_belong_to_same_module = filename_cc.endswith(filename_h)
5179 common_path = ''
5180 if files_belong_to_same_module:
5181 common_path = filename_cc[:-len(filename_h)]
5182 return files_belong_to_same_module, common_path
5183
5184
5185def UpdateIncludeState(filename, include_state, io=codecs):
5186 """Fill up the include_state with new includes found from the file.
5187
5188 Args:
5189 filename: the name of the header to read.
5190 include_state: an _IncludeState instance in which the headers are inserted.
5191 io: The io factory to use to read the file. Provided for testability.
5192
5193 Returns:
5194 True if a header was succesfully added. False otherwise.
5195 """
5196 headerfile = None
5197 try:
5198 headerfile = io.open(filename, 'r', 'utf8', 'replace')
5199 except IOError:
5200 return False
5201 linenum = 0
5202 for line in headerfile:
5203 linenum += 1
5204 clean_line = CleanseComments(line)
5205 match = _RE_PATTERN_INCLUDE.search(clean_line)
5206 if match:
5207 include = match.group(2)
5208 # The value formatting is cute, but not really used right now.
5209 # What matters here is that the key is in include_state.
5210 include_state.setdefault(include, '%s:%d' % (filename, linenum))
5211 return True
5212
5213
5214def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
5215 io=codecs):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005216 """Reports for missing stl includes.
5217
5218 This function will output warnings to make sure you are including the headers
5219 necessary for the stl containers and functions that you use. We only give one
5220 reason to include a header. For example, if you use both equal_to<> and
5221 less<> in a .h file, only one (the latter in the file) of these will be
5222 reported as a reason to include the <functional>.
5223
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005224 Args:
5225 filename: The name of the current file.
5226 clean_lines: A CleansedLines instance containing the file.
5227 include_state: An _IncludeState instance.
5228 error: The function to call with any errors found.
erg@google.com6317a9c2009-06-25 00:28:19 +00005229 io: The IO factory to use to read the header file. Provided for unittest
5230 injection.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005231 """
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005232 required = {} # A map of header name to linenumber and the template entity.
5233 # Example of required: { '<functional>': (1219, 'less<>') }
5234
5235 for linenum in xrange(clean_lines.NumLines()):
5236 line = clean_lines.elided[linenum]
5237 if not line or line[0] == '#':
5238 continue
5239
5240 # String is special -- it is a non-templatized type in STL.
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005241 matched = _RE_PATTERN_STRING.search(line)
5242 if matched:
erg@google.com35589e62010-11-17 18:58:16 +00005243 # Don't warn about strings in non-STL namespaces:
5244 # (We check only the first match per line; good enough.)
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005245 prefix = line[:matched.start()]
erg@google.com35589e62010-11-17 18:58:16 +00005246 if prefix.endswith('std::') or not prefix.endswith('::'):
5247 required['<string>'] = (linenum, 'string')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005248
5249 for pattern, template, header in _re_pattern_algorithm_header:
5250 if pattern.search(line):
5251 required[header] = (linenum, template)
5252
5253 # The following function is just a speed up, no semantics are changed.
5254 if not '<' in line: # Reduces the cpu time usage by skipping lines.
5255 continue
5256
5257 for pattern, template, header in _re_pattern_templates:
5258 if pattern.search(line):
5259 required[header] = (linenum, template)
5260
erg@google.com6317a9c2009-06-25 00:28:19 +00005261 # The policy is that if you #include something in foo.h you don't need to
5262 # include it again in foo.cc. Here, we will look at possible includes.
5263 # Let's copy the include_state so it is only messed up within this function.
5264 include_state = include_state.copy()
5265
5266 # Did we find the header for this file (if any) and succesfully load it?
5267 header_found = False
5268
5269 # Use the absolute path so that matching works properly.
erg@chromium.org8f927562012-01-30 19:51:28 +00005270 abs_filename = FileInfo(filename).FullName()
erg@google.com6317a9c2009-06-25 00:28:19 +00005271
5272 # For Emacs's flymake.
5273 # If cpplint is invoked from Emacs's flymake, a temporary file is generated
5274 # by flymake and that file name might end with '_flymake.cc'. In that case,
5275 # restore original file name here so that the corresponding header file can be
5276 # found.
5277 # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
5278 # instead of 'foo_flymake.h'
erg@google.com35589e62010-11-17 18:58:16 +00005279 abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
erg@google.com6317a9c2009-06-25 00:28:19 +00005280
5281 # include_state is modified during iteration, so we iterate over a copy of
5282 # the keys.
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005283 header_keys = include_state.keys()
5284 for header in header_keys:
erg@google.com6317a9c2009-06-25 00:28:19 +00005285 (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
5286 fullpath = common_path + header
5287 if same_module and UpdateIncludeState(fullpath, include_state, io):
5288 header_found = True
5289
5290 # If we can't find the header file for a .cc, assume it's because we don't
5291 # know where to look. In that case we'll give up as we're not sure they
5292 # didn't include it in the .h file.
5293 # TODO(unknown): Do a better job of finding .h files so we are confident that
5294 # not having the .h file means there isn't one.
5295 if filename.endswith('.cc') and not header_found:
5296 return
5297
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005298 # All the lines have been processed, report the errors found.
5299 for required_header_unstripped in required:
5300 template = required[required_header_unstripped][1]
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005301 if required_header_unstripped.strip('<>"') not in include_state:
5302 error(filename, required[required_header_unstripped][0],
5303 'build/include_what_you_use', 4,
5304 'Add #include ' + required_header_unstripped + ' for ' + template)
5305
5306
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005307_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
5308
5309
5310def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
5311 """Check that make_pair's template arguments are deduced.
5312
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005313 G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005314 specified explicitly, and such use isn't intended in any case.
5315
5316 Args:
5317 filename: The name of the current file.
5318 clean_lines: A CleansedLines instance containing the file.
5319 linenum: The number of the line to check.
5320 error: The function to call with any errors found.
5321 """
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00005322 line = clean_lines.elided[linenum]
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005323 match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
5324 if match:
5325 error(filename, linenum, 'build/explicit_make_pair',
5326 4, # 4 = high confidence
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00005327 'For C++11-compatibility, omit template arguments from make_pair'
5328 ' OR use pair directly OR if appropriate, construct a pair directly')
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005329def CheckDefaultLambdaCaptures(filename, clean_lines, linenum, error):
5330 """Check that default lambda captures are not used.
5331
5332 Args:
5333 filename: The name of the current file.
5334 clean_lines: A CleansedLines instance containing the file.
5335 linenum: The number of the line to check.
5336 error: The function to call with any errors found.
5337 """
5338 line = clean_lines.elided[linenum]
5339
5340 # A lambda introducer specifies a default capture if it starts with "[="
5341 # or if it starts with "[&" _not_ followed by an identifier.
5342 match = Match(r'^(.*)\[\s*(?:=|&[^\w])', line)
5343 if match:
5344 # Found a potential error, check what comes after the lambda-introducer.
5345 # If it's not open parenthesis (for lambda-declarator) or open brace
5346 # (for compound-statement), it's not a lambda.
5347 line, _, pos = CloseExpression(clean_lines, linenum, len(match.group(1)))
5348 if pos >= 0 and Match(r'^\s*[{(]', line[pos:]):
5349 error(filename, linenum, 'build/c++11',
5350 4, # 4 = high confidence
5351 'Default lambda captures are an unapproved C++ feature.')
5352
5353
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005354
5355
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00005356def ProcessLine(filename, file_extension, clean_lines, line,
5357 include_state, function_state, nesting_state, error,
5358 extra_check_functions=[]):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005359 """Processes a single line in the file.
5360
5361 Args:
5362 filename: Filename of the file that is being processed.
5363 file_extension: The extension (dot not included) of the file.
5364 clean_lines: An array of strings, each representing a line of the file,
5365 with comments stripped.
5366 line: Number of line being processed.
5367 include_state: An _IncludeState instance in which the headers are inserted.
5368 function_state: A _FunctionState instance which counts function lines, etc.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005369 nesting_state: A NestingState instance which maintains information about
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00005370 the current stack of nested blocks being parsed.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005371 error: A callable to which errors are reported, which takes 4 arguments:
5372 filename, line number, error level, and message
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005373 extra_check_functions: An array of additional check functions that will be
5374 run on each source line. Each function takes 4
5375 arguments: filename, clean_lines, line, error
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005376 """
5377 raw_lines = clean_lines.raw_lines
erg@google.com35589e62010-11-17 18:58:16 +00005378 ParseNolintSuppressions(filename, raw_lines[line], line, error)
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00005379 nesting_state.Update(filename, clean_lines, line, error)
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005380 if nesting_state.InAsmBlock(): return
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005381 CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005382 CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00005383 CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005384 CheckLanguage(filename, clean_lines, line, file_extension, include_state,
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00005385 nesting_state, error)
5386 CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005387 CheckForNonStandardConstructs(filename, clean_lines, line,
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00005388 nesting_state, error)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00005389 CheckVlogArguments(filename, clean_lines, line, error)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005390 CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com6317a9c2009-06-25 00:28:19 +00005391 CheckInvalidIncrement(filename, clean_lines, line, error)
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005392 CheckMakePairUsesDeduction(filename, clean_lines, line, error)
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005393 CheckDefaultLambdaCaptures(filename, clean_lines, line, error)
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005394 for check_fn in extra_check_functions:
5395 check_fn(filename, clean_lines, line, error)
avakulenko@google.com17449932014-07-28 22:13:33 +00005396
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005397def FlagCxx11Features(filename, clean_lines, linenum, error):
5398 """Flag those c++11 features that we only allow in certain places.
5399
5400 Args:
5401 filename: The name of the current file.
5402 clean_lines: A CleansedLines instance containing the file.
5403 linenum: The number of the line to check.
5404 error: The function to call with any errors found.
5405 """
5406 line = clean_lines.elided[linenum]
5407
5408 # Flag unapproved C++11 headers.
5409 include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line)
5410 if include and include.group(1) in ('cfenv',
5411 'condition_variable',
5412 'fenv.h',
5413 'future',
5414 'mutex',
5415 'thread',
5416 'chrono',
5417 'ratio',
5418 'regex',
5419 'system_error',
5420 ):
5421 error(filename, linenum, 'build/c++11', 5,
5422 ('<%s> is an unapproved C++11 header.') % include.group(1))
5423
5424 # The only place where we need to worry about C++11 keywords and library
5425 # features in preprocessor directives is in macro definitions.
5426 if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return
5427
5428 # These are classes and free functions. The classes are always
5429 # mentioned as std::*, but we only catch the free functions if
5430 # they're not found by ADL. They're alphabetical by header.
5431 for top_name in (
5432 # type_traits
5433 'alignment_of',
5434 'aligned_union',
5435
5436 # utility
5437 'forward',
5438 ):
5439 if Search(r'\bstd::%s\b' % top_name, line):
5440 error(filename, linenum, 'build/c++11', 5,
5441 ('std::%s is an unapproved C++11 class or function. Send c-style '
5442 'an example of where it would make your code more readable, and '
5443 'they may let you use it.') % top_name)
5444
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005445
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005446def ProcessFileData(filename, file_extension, lines, error,
5447 extra_check_functions=[]):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005448 """Performs lint checks and reports any errors to the given error function.
5449
5450 Args:
5451 filename: Filename of the file that is being processed.
5452 file_extension: The extension (dot not included) of the file.
5453 lines: An array of strings, each representing a line of the file, with the
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005454 last element being empty if the file is terminated with a newline.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005455 error: A callable to which errors are reported, which takes 4 arguments:
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005456 filename, line number, error level, and message
5457 extra_check_functions: An array of additional check functions that will be
5458 run on each source line. Each function takes 4
5459 arguments: filename, clean_lines, line, error
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005460 """
5461 lines = (['// marker so line numbers and indices both start at 1'] + lines +
5462 ['// marker so line numbers end in a known way'])
5463
5464 include_state = _IncludeState()
5465 function_state = _FunctionState()
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005466 nesting_state = NestingState()
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005467
erg@google.com35589e62010-11-17 18:58:16 +00005468 ResetNolintSuppressions()
5469
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005470 CheckForCopyright(filename, lines, error)
5471
5472 if file_extension == 'h':
5473 CheckForHeaderGuard(filename, lines, error)
5474
5475 RemoveMultiLineComments(filename, lines, error)
5476 clean_lines = CleansedLines(lines)
5477 for line in xrange(clean_lines.NumLines()):
5478 ProcessLine(filename, file_extension, clean_lines, line,
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00005479 include_state, function_state, nesting_state, error,
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005480 extra_check_functions)
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005481 FlagCxx11Features(filename, clean_lines, line, error)
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00005482 nesting_state.CheckCompletedBlocks(filename, error)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005483
5484 CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
5485
5486 # We check here rather than inside ProcessLine so that we see raw
5487 # lines rather than "cleaned" lines.
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00005488 CheckForBadCharacters(filename, lines, error)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005489
5490 CheckForNewlineAtEOF(filename, lines, error)
5491
avakulenko@google.com17449932014-07-28 22:13:33 +00005492def ProcessConfigOverrides(filename):
5493 """ Loads the configuration files and processes the config overrides.
5494
5495 Args:
5496 filename: The name of the file being processed by the linter.
5497
5498 Returns:
5499 False if the current |filename| should not be processed further.
5500 """
5501
5502 abs_filename = os.path.abspath(filename)
5503 cfg_filters = []
5504 keep_looking = True
5505 while keep_looking:
5506 abs_path, base_name = os.path.split(abs_filename)
5507 if not base_name:
5508 break # Reached the root directory.
5509
5510 cfg_file = os.path.join(abs_path, "CPPLINT.cfg")
5511 abs_filename = abs_path
5512 if not os.path.isfile(cfg_file):
5513 continue
5514
5515 try:
5516 with open(cfg_file) as file_handle:
5517 for line in file_handle:
5518 line, _, _ = line.partition('#') # Remove comments.
5519 if not line.strip():
5520 continue
5521
5522 name, _, val = line.partition('=')
5523 name = name.strip()
5524 val = val.strip()
5525 if name == 'set noparent':
5526 keep_looking = False
5527 elif name == 'filter':
5528 cfg_filters.append(val)
5529 elif name == 'exclude_files':
5530 # When matching exclude_files pattern, use the base_name of
5531 # the current file name or the directory name we are processing.
5532 # For example, if we are checking for lint errors in /foo/bar/baz.cc
5533 # and we found the .cfg file at /foo/CPPLINT.cfg, then the config
5534 # file's "exclude_files" filter is meant to be checked against "bar"
5535 # and not "baz" nor "bar/baz.cc".
5536 if base_name:
5537 pattern = re.compile(val)
5538 if pattern.match(base_name):
5539 sys.stderr.write('Ignoring "%s": file excluded by "%s". '
5540 'File path component "%s" matches '
5541 'pattern "%s"\n' %
5542 (filename, cfg_file, base_name, val))
5543 return False
5544 else:
5545 sys.stderr.write(
5546 'Invalid configuration option (%s) in file %s\n' %
5547 (name, cfg_file))
5548
5549 except IOError:
5550 sys.stderr.write(
5551 "Skipping config file '%s': Can't open for reading\n" % cfg_file)
5552 keep_looking = False
5553
5554 # Apply all the accumulated filters in reverse order (top-level directory
5555 # config options having the least priority).
5556 for filter in reversed(cfg_filters):
5557 _AddFilters(filter)
5558
5559 return True
5560
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005561
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005562def ProcessFile(filename, vlevel, extra_check_functions=[]):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005563 """Does google-lint on a single file.
5564
5565 Args:
5566 filename: The name of the file to parse.
5567
5568 vlevel: The level of errors to report. Every error of confidence
5569 >= verbose_level will be reported. 0 is a good default.
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005570
5571 extra_check_functions: An array of additional check functions that will be
5572 run on each source line. Each function takes 4
5573 arguments: filename, clean_lines, line, error
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005574 """
5575
5576 _SetVerboseLevel(vlevel)
avakulenko@google.com17449932014-07-28 22:13:33 +00005577 _BackupFilters()
5578
5579 if not ProcessConfigOverrides(filename):
5580 _RestoreFilters()
5581 return
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005582
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005583 lf_lines = []
5584 crlf_lines = []
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005585 try:
5586 # Support the UNIX convention of using "-" for stdin. Note that
5587 # we are not opening the file with universal newline support
5588 # (which codecs doesn't support anyway), so the resulting lines do
5589 # contain trailing '\r' characters if we are reading a file that
5590 # has CRLF endings.
5591 # If after the split a trailing '\r' is present, it is removed
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005592 # below.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005593 if filename == '-':
5594 lines = codecs.StreamReaderWriter(sys.stdin,
5595 codecs.getreader('utf8'),
5596 codecs.getwriter('utf8'),
5597 'replace').read().split('\n')
5598 else:
5599 lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
5600
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005601 # Remove trailing '\r'.
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005602 # The -1 accounts for the extra trailing blank line we get from split()
5603 for linenum in range(len(lines) - 1):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005604 if lines[linenum].endswith('\r'):
5605 lines[linenum] = lines[linenum].rstrip('\r')
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005606 crlf_lines.append(linenum + 1)
5607 else:
5608 lf_lines.append(linenum + 1)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005609
5610 except IOError:
5611 sys.stderr.write(
5612 "Skipping input '%s': Can't open for reading\n" % filename)
avakulenko@google.com17449932014-07-28 22:13:33 +00005613 _RestoreFilters()
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005614 return
5615
5616 # Note, if no dot is found, this will give the entire filename as the ext.
5617 file_extension = filename[filename.rfind('.') + 1:]
5618
5619 # When reading from stdin, the extension is unknown, so no cpplint tests
5620 # should rely on the extension.
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00005621 if filename != '-' and file_extension not in _valid_extensions:
5622 sys.stderr.write('Ignoring %s; not a valid file name '
5623 '(%s)\n' % (filename, ', '.join(_valid_extensions)))
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005624 else:
asvitkine@chromium.org8b8d8be2011-09-08 15:34:45 +00005625 ProcessFileData(filename, file_extension, lines, Error,
5626 extra_check_functions)
avakulenko@google.comd39bbb52014-06-04 22:55:20 +00005627
5628 # If end-of-line sequences are a mix of LF and CR-LF, issue
5629 # warnings on the lines with CR.
5630 #
5631 # Don't issue any warnings if all lines are uniformly LF or CR-LF,
5632 # since critique can handle these just fine, and the style guide
5633 # doesn't dictate a particular end of line sequence.
5634 #
5635 # We can't depend on os.linesep to determine what the desired
5636 # end-of-line sequence should be, since that will return the
5637 # server-side end-of-line sequence.
5638 if lf_lines and crlf_lines:
5639 # Warn on every line with CR. An alternative approach might be to
5640 # check whether the file is mostly CRLF or just LF, and warn on the
5641 # minority, we bias toward LF here since most tools prefer LF.
5642 for linenum in crlf_lines:
5643 Error(filename, linenum, 'whitespace/newline', 1,
5644 'Unexpected \\r (^M) found; better to use only \\n')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005645
5646 sys.stderr.write('Done processing %s\n' % filename)
avakulenko@google.com17449932014-07-28 22:13:33 +00005647 _RestoreFilters()
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005648
5649
5650def PrintUsage(message):
5651 """Prints a brief usage string and exits, optionally with an error message.
5652
5653 Args:
5654 message: The optional error message.
5655 """
5656 sys.stderr.write(_USAGE)
5657 if message:
5658 sys.exit('\nFATAL ERROR: ' + message)
5659 else:
5660 sys.exit(1)
5661
5662
5663def PrintCategories():
5664 """Prints a list of all the error-categories used by error messages.
5665
5666 These are the categories used to filter messages via --filter.
5667 """
erg@google.com35589e62010-11-17 18:58:16 +00005668 sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005669 sys.exit(0)
5670
5671
5672def ParseArguments(args):
5673 """Parses the command line arguments.
5674
5675 This may set the output format and verbosity level as side-effects.
5676
5677 Args:
5678 args: The command line arguments:
5679
5680 Returns:
5681 The list of filenames to lint.
5682 """
5683 try:
5684 (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
erg@google.com26970fa2009-11-17 18:07:32 +00005685 'counting=',
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00005686 'filter=',
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00005687 'root=',
5688 'linelength=',
5689 'extensions='])
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005690 except getopt.GetoptError:
5691 PrintUsage('Invalid arguments.')
5692
5693 verbosity = _VerboseLevel()
5694 output_format = _OutputFormat()
5695 filters = ''
erg@google.com26970fa2009-11-17 18:07:32 +00005696 counting_style = ''
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005697
5698 for (opt, val) in opts:
5699 if opt == '--help':
5700 PrintUsage(None)
5701 elif opt == '--output':
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00005702 if val not in ('emacs', 'vs7', 'eclipse'):
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00005703 PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005704 output_format = val
5705 elif opt == '--verbose':
5706 verbosity = int(val)
5707 elif opt == '--filter':
5708 filters = val
erg@google.com6317a9c2009-06-25 00:28:19 +00005709 if not filters:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005710 PrintCategories()
erg@google.com26970fa2009-11-17 18:07:32 +00005711 elif opt == '--counting':
5712 if val not in ('total', 'toplevel', 'detailed'):
5713 PrintUsage('Valid counting options are total, toplevel, and detailed')
5714 counting_style = val
mazda@chromium.org3fffcec2013-06-07 01:04:53 +00005715 elif opt == '--root':
5716 global _root
5717 _root = val
raphael.kubo.da.costa@intel.com331fbc42014-05-09 08:48:20 +00005718 elif opt == '--linelength':
5719 global _line_length
5720 try:
5721 _line_length = int(val)
5722 except ValueError:
5723 PrintUsage('Line length must be digits.')
5724 elif opt == '--extensions':
5725 global _valid_extensions
5726 try:
5727 _valid_extensions = set(val.split(','))
5728 except ValueError:
5729 PrintUsage('Extensions must be comma seperated list.')
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005730
5731 if not filenames:
5732 PrintUsage('No files were specified.')
5733
5734 _SetOutputFormat(output_format)
5735 _SetVerboseLevel(verbosity)
5736 _SetFilters(filters)
erg@google.com26970fa2009-11-17 18:07:32 +00005737 _SetCountingStyle(counting_style)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005738
5739 return filenames
5740
5741
5742def main():
5743 filenames = ParseArguments(sys.argv[1:])
5744
5745 # Change stderr to write with replacement characters so we don't die
5746 # if we try to print something containing non-ASCII characters.
5747 sys.stderr = codecs.StreamReaderWriter(sys.stderr,
5748 codecs.getreader('utf8'),
5749 codecs.getwriter('utf8'),
5750 'replace')
5751
erg@google.com26970fa2009-11-17 18:07:32 +00005752 _cpplint_state.ResetErrorCounts()
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005753 for filename in filenames:
5754 ProcessFile(filename, _cpplint_state.verbose_level)
erg@google.com26970fa2009-11-17 18:07:32 +00005755 _cpplint_state.PrintErrorCounts()
5756
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00005757 sys.exit(_cpplint_state.error_count > 0)
5758
5759
5760if __name__ == '__main__':
5761 main()