blob: ae53e0cccf3847fde1af9369ffb9c8bc10e4246b [file] [log] [blame]
mbligh09a025e2008-06-06 20:29:49 +00001#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
5"""reindent [-d][-r][-v] [ path ... ]
6
7-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose) Verbose. Print informative msgs; else no output.
11-h (--help) Help. Print this usage information and exit.
12
13Change Python (.py) files to use 4-space indents and no hard tab characters.
14Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files. Also ensure the last line ends with a newline.
16
17If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output. In this case, the -d, -r and -v flags are
20ignored.
21
22You can pass one or more file and/or directory paths. When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension. If it finds nothing to
28change, the file is left alone. If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
31
32The hard part of reindenting is figuring out what to do with comment
33lines. So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
35
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
38user/group and permissions could leave the backup file more readable that
39you'd prefer. You can always use the --nobackup option to prevent this.
40"""
41
Derek Beckettdb735112020-08-27 10:25:15 -070042from __future__ import absolute_import
43from __future__ import division
44from __future__ import print_function
45
mbligh09a025e2008-06-06 20:29:49 +000046__version__ = "1"
47
48import tokenize
49import os, shutil
50import sys
51
Derek Beckettdb735112020-08-27 10:25:15 -070052from six.moves import range
53
mbligh09a025e2008-06-06 20:29:49 +000054verbose = 0
55recurse = 0
56dryrun = 0
57makebackup = True
58
59def usage(msg=None):
60 if msg is not None:
Derek Beckettdb735112020-08-27 10:25:15 -070061 print(msg, file=sys.stderr)
62 print(__doc__, file=sys.stderr)
mbligh09a025e2008-06-06 20:29:49 +000063
64def errprint(*args):
65 sep = ""
66 for arg in args:
67 sys.stderr.write(sep + str(arg))
68 sep = " "
69 sys.stderr.write("\n")
70
71def main():
72 import getopt
73 global verbose, recurse, dryrun, makebackup
74 try:
75 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
76 ["dryrun", "recurse", "nobackup", "verbose", "help"])
Derek Beckettdb735112020-08-27 10:25:15 -070077 except getopt.error as msg:
mbligh09a025e2008-06-06 20:29:49 +000078 usage(msg)
79 return
80 for o, a in opts:
81 if o in ('-d', '--dryrun'):
82 dryrun += 1
83 elif o in ('-r', '--recurse'):
84 recurse += 1
85 elif o in ('-n', '--nobackup'):
86 makebackup = False
87 elif o in ('-v', '--verbose'):
88 verbose += 1
89 elif o in ('-h', '--help'):
90 usage()
91 return
92 if not args:
93 r = Reindenter(sys.stdin)
94 r.run()
95 r.write(sys.stdout)
96 return
97 for arg in args:
98 check(arg)
99
100def check(file):
101 if os.path.isdir(file) and not os.path.islink(file):
102 if verbose:
Derek Beckettdb735112020-08-27 10:25:15 -0700103 print("listing directory", file)
mbligh09a025e2008-06-06 20:29:49 +0000104 names = os.listdir(file)
105 for name in names:
106 fullname = os.path.join(file, name)
107 if ((recurse and os.path.isdir(fullname) and
108 not os.path.islink(fullname))
109 or name.lower().endswith(".py")):
110 check(fullname)
111 return
112
113 if verbose:
Derek Beckettdb735112020-08-27 10:25:15 -0700114 print("checking", file, "...", end=' ')
mbligh09a025e2008-06-06 20:29:49 +0000115 try:
116 f = open(file)
Derek Beckettdb735112020-08-27 10:25:15 -0700117 except IOError as msg:
mbligh09a025e2008-06-06 20:29:49 +0000118 errprint("%s: I/O Error: %s" % (file, str(msg)))
119 return
120
121 r = Reindenter(f)
122 f.close()
123 if r.run():
124 if verbose:
Derek Beckettdb735112020-08-27 10:25:15 -0700125 print("changed.")
mbligh09a025e2008-06-06 20:29:49 +0000126 if dryrun:
Derek Beckettdb735112020-08-27 10:25:15 -0700127 print("But this is a dry run, so leaving it alone.")
mbligh09a025e2008-06-06 20:29:49 +0000128 if not dryrun:
129 bak = file + ".bak"
130 if makebackup:
131 shutil.copyfile(file, bak)
132 if verbose:
Derek Beckettdb735112020-08-27 10:25:15 -0700133 print("backed up", file, "to", bak)
mbligh09a025e2008-06-06 20:29:49 +0000134 f = open(file, "w")
135 r.write(f)
136 f.close()
137 if verbose:
Derek Beckettdb735112020-08-27 10:25:15 -0700138 print("wrote new", file)
mbligh09a025e2008-06-06 20:29:49 +0000139 return True
140 else:
141 if verbose:
Derek Beckettdb735112020-08-27 10:25:15 -0700142 print("unchanged.")
mbligh09a025e2008-06-06 20:29:49 +0000143 return False
144
145def _rstrip(line, JUNK='\n \t'):
146 """Return line stripped of trailing spaces, tabs, newlines.
147
148 Note that line.rstrip() instead also strips sundry control characters,
149 but at least one known Emacs user expects to keep junk like that, not
150 mentioning Barry by name or anything <wink>.
151 """
152
153 i = len(line)
154 while i > 0 and line[i-1] in JUNK:
155 i -= 1
156 return line[:i]
157
158class Reindenter:
159
160 def __init__(self, f):
161 self.find_stmt = 1 # next token begins a fresh stmt?
162 self.level = 0 # current indent level
163
164 # Raw file lines.
165 self.raw = f.readlines()
166
167 # File lines, rstripped & tab-expanded. Dummy at start is so
168 # that we can use tokenize's 1-based line numbering easily.
169 # Note that a line is all-blank iff it's "\n".
170 self.lines = [_rstrip(line).expandtabs() + "\n"
171 for line in self.raw]
172 self.lines.insert(0, None)
173 self.index = 1 # index into self.lines of next line
174
175 # List of (lineno, indentlevel) pairs, one for each stmt and
176 # comment line. indentlevel is -1 for comment lines, as a
177 # signal that tokenize doesn't know what to do about them;
178 # indeed, they're our headache!
179 self.stats = []
180
181 def run(self):
182 tokenize.tokenize(self.getline, self.tokeneater)
183 # Remove trailing empty lines.
184 lines = self.lines
185 while lines and lines[-1] == "\n":
186 lines.pop()
187 # Sentinel.
188 stats = self.stats
189 stats.append((len(lines), 0))
190 # Map count of leading spaces to # we want.
191 have2want = {}
192 # Program after transformation.
193 after = self.after = []
194 # Copy over initial empty lines -- there's nothing to do until
195 # we see a line with *something* on it.
196 i = stats[0][0]
197 after.extend(lines[1:i])
198 for i in range(len(stats)-1):
199 thisstmt, thislevel = stats[i]
200 nextstmt = stats[i+1][0]
201 have = getlspace(lines[thisstmt])
202 want = thislevel * 4
203 if want < 0:
204 # A comment line.
205 if have:
206 # An indented comment line. If we saw the same
207 # indentation before, reuse what it most recently
208 # mapped to.
209 want = have2want.get(have, -1)
210 if want < 0:
211 # Then it probably belongs to the next real stmt.
Derek Beckettdb735112020-08-27 10:25:15 -0700212 for j in range(i+1, len(stats)-1):
mbligh09a025e2008-06-06 20:29:49 +0000213 jline, jlevel = stats[j]
214 if jlevel >= 0:
215 if have == getlspace(lines[jline]):
216 want = jlevel * 4
217 break
218 if want < 0: # Maybe it's a hanging
219 # comment like this one,
220 # in which case we should shift it like its base
221 # line got shifted.
Derek Beckettdb735112020-08-27 10:25:15 -0700222 for j in range(i-1, -1, -1):
mbligh09a025e2008-06-06 20:29:49 +0000223 jline, jlevel = stats[j]
224 if jlevel >= 0:
225 want = have + getlspace(after[jline-1]) - \
226 getlspace(lines[jline])
227 break
228 if want < 0:
229 # Still no luck -- leave it alone.
230 want = have
231 else:
232 want = 0
233 assert want >= 0
234 have2want[have] = want
235 diff = want - have
236 if diff == 0 or have == 0:
237 after.extend(lines[thisstmt:nextstmt])
238 else:
239 for line in lines[thisstmt:nextstmt]:
240 if diff > 0:
241 if line == "\n":
242 after.append(line)
243 else:
244 after.append(" " * diff + line)
245 else:
246 remove = min(getlspace(line), -diff)
247 after.append(line[remove:])
248 return self.raw != self.after
249
250 def write(self, f):
251 f.writelines(self.after)
252
253 # Line-getter for tokenize.
254 def getline(self):
255 if self.index >= len(self.lines):
256 line = ""
257 else:
258 line = self.lines[self.index]
259 self.index += 1
260 return line
261
262 # Line-eater for tokenize.
Derek Beckettdb735112020-08-27 10:25:15 -0700263 def tokeneater(self, type, token, sline_scol, end, line,
mbligh09a025e2008-06-06 20:29:49 +0000264 INDENT=tokenize.INDENT,
265 DEDENT=tokenize.DEDENT,
266 NEWLINE=tokenize.NEWLINE,
267 COMMENT=tokenize.COMMENT,
268 NL=tokenize.NL):
269
Derek Beckettdb735112020-08-27 10:25:15 -0700270 (sline, scol) = sline_scol
mbligh09a025e2008-06-06 20:29:49 +0000271 if type == NEWLINE:
272 # A program statement, or ENDMARKER, will eventually follow,
273 # after some (possibly empty) run of tokens of the form
274 # (NL | COMMENT)* (INDENT | DEDENT+)?
275 self.find_stmt = 1
276
277 elif type == INDENT:
278 self.find_stmt = 1
279 self.level += 1
280
281 elif type == DEDENT:
282 self.find_stmt = 1
283 self.level -= 1
284
285 elif type == COMMENT:
286 if self.find_stmt:
287 self.stats.append((sline, -1))
288 # but we're still looking for a new stmt, so leave
289 # find_stmt alone
290
291 elif type == NL:
292 pass
293
294 elif self.find_stmt:
295 # This is the first "real token" following a NEWLINE, so it
296 # must be the first token of the next program statement, or an
297 # ENDMARKER.
298 self.find_stmt = 0
299 if line: # not endmarker
300 self.stats.append((sline, self.level))
301
302# Count number of leading blanks.
303def getlspace(line):
304 i, n = 0, len(line)
305 while i < n and line[i] == " ":
306 i += 1
307 return i
308
309if __name__ == '__main__':
310 main()