Blame - utils/reindent.py - chromium.googlesource.com/chromiumos/third_party/labpack

blob: ae53e0cccf3847fde1af9369ffb9c8bc10e4246b [file] [log] [blame]

mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
				3	# Released to the public domain, by Tim Peters, 03 October 2000.
				4
				5	"""reindent [-d][-r][-v] [ path ... ]
				6
				7	-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
				8	-r (--recurse) Recurse. Search for all .py files in subdirectories too.
				9	-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
				10	-v (--verbose) Verbose. Print informative msgs; else no output.
				11	-h (--help) Help. Print this usage information and exit.
				12
				13	Change Python (.py) files to use 4-space indents and no hard tab characters.
				14	Also trim excess spaces and tabs from ends of lines, and remove empty lines
				15	at the end of files. Also ensure the last line ends with a newline.
				16
				17	If no paths are given on the command line, reindent operates as a filter,
				18	reading a single source file from standard input and writing the transformed
				19	source to standard output. In this case, the -d, -r and -v flags are
				20	ignored.
				21
				22	You can pass one or more file and/or directory paths. When a directory
				23	path, all .py files within the directory will be examined, and, if the -r
				24	option is given, likewise recursively for subdirectories.
				25
				26	If output is not to standard output, reindent overwrites files in place,
				27	renaming the originals with a .bak extension. If it finds nothing to
				28	change, the file is left alone. If reindent does change a file, the changed
				29	file is a fixed-point for future runs (i.e., running reindent on the
				30	resulting .py file won't change it again).
				31
				32	The hard part of reindenting is figuring out what to do with comment
				33	lines. So long as the input files get a clean bill of health from
				34	tabnanny.py, reindent should do a good job.
				35
				36	The backup file is a copy of the one that is being reindented. The ".bak"
				37	file is generated with shutil.copy(), but some corner cases regarding
				38	user/group and permissions could leave the backup file more readable that
				39	you'd prefer. You can always use the --nobackup option to prevent this.
				40	"""
				41
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	42	from __future__ import absolute_import
				43	from __future__ import division
				44	from __future__ import print_function
				45
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	46	__version__ = "1"
				47
				48	import tokenize
				49	import os, shutil
				50	import sys
				51
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	52	from six.moves import range
				53
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	54	verbose = 0
				55	recurse = 0
				56	dryrun = 0
				57	makebackup = True
				58
				59	def usage(msg=None):
				60	if msg is not None:
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	61	print(msg, file=sys.stderr)
				62	print(__doc__, file=sys.stderr)
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	63
				64	def errprint(*args):
				65	sep = ""
				66	for arg in args:
				67	sys.stderr.write(sep + str(arg))
				68	sep = " "
				69	sys.stderr.write("\n")
				70
				71	def main():
				72	import getopt
				73	global verbose, recurse, dryrun, makebackup
				74	try:
				75	opts, args = getopt.getopt(sys.argv[1:], "drnvh",
				76	["dryrun", "recurse", "nobackup", "verbose", "help"])
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	77	except getopt.error as msg:
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	78	usage(msg)
				79	return
				80	for o, a in opts:
				81	if o in ('-d', '--dryrun'):
				82	dryrun += 1
				83	elif o in ('-r', '--recurse'):
				84	recurse += 1
				85	elif o in ('-n', '--nobackup'):
				86	makebackup = False
				87	elif o in ('-v', '--verbose'):
				88	verbose += 1
				89	elif o in ('-h', '--help'):
				90	usage()
				91	return
				92	if not args:
				93	r = Reindenter(sys.stdin)
				94	r.run()
				95	r.write(sys.stdout)
				96	return
				97	for arg in args:
				98	check(arg)
				99
				100	def check(file):
				101	if os.path.isdir(file) and not os.path.islink(file):
				102	if verbose:
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	103	print("listing directory", file)
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	104	names = os.listdir(file)
				105	for name in names:
				106	fullname = os.path.join(file, name)
				107	if ((recurse and os.path.isdir(fullname) and
				108	not os.path.islink(fullname))
				109	or name.lower().endswith(".py")):
				110	check(fullname)
				111	return
				112
				113	if verbose:
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	114	print("checking", file, "...", end=' ')
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	115	try:
				116	f = open(file)
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	117	except IOError as msg:
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	118	errprint("%s: I/O Error: %s" % (file, str(msg)))
				119	return
				120
				121	r = Reindenter(f)
				122	f.close()
				123	if r.run():
				124	if verbose:
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	125	print("changed.")
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	126	if dryrun:
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	127	print("But this is a dry run, so leaving it alone.")
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	128	if not dryrun:
				129	bak = file + ".bak"
				130	if makebackup:
				131	shutil.copyfile(file, bak)
				132	if verbose:
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	133	print("backed up", file, "to", bak)
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	134	f = open(file, "w")
				135	r.write(f)
				136	f.close()
				137	if verbose:
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	138	print("wrote new", file)
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	139	return True
				140	else:
				141	if verbose:
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	142	print("unchanged.")
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	143	return False
				144
				145	def _rstrip(line, JUNK='\n \t'):
				146	"""Return line stripped of trailing spaces, tabs, newlines.
				147
				148	Note that line.rstrip() instead also strips sundry control characters,
				149	but at least one known Emacs user expects to keep junk like that, not
				150	mentioning Barry by name or anything <wink>.
				151	"""
				152
				153	i = len(line)
				154	while i > 0 and line[i-1] in JUNK:
				155	i -= 1
				156	return line[:i]
				157
				158	class Reindenter:
				159
				160	def __init__(self, f):
				161	self.find_stmt = 1 # next token begins a fresh stmt?
				162	self.level = 0 # current indent level
				163
				164	# Raw file lines.
				165	self.raw = f.readlines()
				166
				167	# File lines, rstripped & tab-expanded. Dummy at start is so
				168	# that we can use tokenize's 1-based line numbering easily.
				169	# Note that a line is all-blank iff it's "\n".
				170	self.lines = [_rstrip(line).expandtabs() + "\n"
				171	for line in self.raw]
				172	self.lines.insert(0, None)
				173	self.index = 1 # index into self.lines of next line
				174
				175	# List of (lineno, indentlevel) pairs, one for each stmt and
				176	# comment line. indentlevel is -1 for comment lines, as a
				177	# signal that tokenize doesn't know what to do about them;
				178	# indeed, they're our headache!
				179	self.stats = []
				180
				181	def run(self):
				182	tokenize.tokenize(self.getline, self.tokeneater)
				183	# Remove trailing empty lines.
				184	lines = self.lines
				185	while lines and lines[-1] == "\n":
				186	lines.pop()
				187	# Sentinel.
				188	stats = self.stats
				189	stats.append((len(lines), 0))
				190	# Map count of leading spaces to # we want.
				191	have2want = {}
				192	# Program after transformation.
				193	after = self.after = []
				194	# Copy over initial empty lines -- there's nothing to do until
				195	# we see a line with something on it.
				196	i = stats[0][0]
				197	after.extend(lines[1:i])
				198	for i in range(len(stats)-1):
				199	thisstmt, thislevel = stats[i]
				200	nextstmt = stats[i+1][0]
				201	have = getlspace(lines[thisstmt])
				202	want = thislevel * 4
				203	if want < 0:
				204	# A comment line.
				205	if have:
				206	# An indented comment line. If we saw the same
				207	# indentation before, reuse what it most recently
				208	# mapped to.
				209	want = have2want.get(have, -1)
				210	if want < 0:
				211	# Then it probably belongs to the next real stmt.
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	212	for j in range(i+1, len(stats)-1):
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	213	jline, jlevel = stats[j]
				214	if jlevel >= 0:
				215	if have == getlspace(lines[jline]):
				216	want = jlevel * 4
				217	break
				218	if want < 0: # Maybe it's a hanging
				219	# comment like this one,
				220	# in which case we should shift it like its base
				221	# line got shifted.
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	222	for j in range(i-1, -1, -1):
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	223	jline, jlevel = stats[j]
				224	if jlevel >= 0:
				225	want = have + getlspace(after[jline-1]) - \
				226	getlspace(lines[jline])
				227	break
				228	if want < 0:
				229	# Still no luck -- leave it alone.
				230	want = have
				231	else:
				232	want = 0
				233	assert want >= 0
				234	have2want[have] = want
				235	diff = want - have
				236	if diff == 0 or have == 0:
				237	after.extend(lines[thisstmt:nextstmt])
				238	else:
				239	for line in lines[thisstmt:nextstmt]:
				240	if diff > 0:
				241	if line == "\n":
				242	after.append(line)
				243	else:
				244	after.append(" " * diff + line)
				245	else:
				246	remove = min(getlspace(line), -diff)
				247	after.append(line[remove:])
				248	return self.raw != self.after
				249
				250	def write(self, f):
				251	f.writelines(self.after)
				252
				253	# Line-getter for tokenize.
				254	def getline(self):
				255	if self.index >= len(self.lines):
				256	line = ""
				257	else:
				258	line = self.lines[self.index]
				259	self.index += 1
				260	return line
				261
				262	# Line-eater for tokenize.
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	263	def tokeneater(self, type, token, sline_scol, end, line,
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	264	INDENT=tokenize.INDENT,
				265	DEDENT=tokenize.DEDENT,
				266	NEWLINE=tokenize.NEWLINE,
				267	COMMENT=tokenize.COMMENT,
				268	NL=tokenize.NL):
				269
Derek Beckett	db73511	2020-08-27 10:25:15 -0700	[diff] [blame^]	270	(sline, scol) = sline_scol
mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	271	if type == NEWLINE:
				272	# A program statement, or ENDMARKER, will eventually follow,
				273	# after some (possibly empty) run of tokens of the form
				274	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
				275	self.find_stmt = 1
				276
				277	elif type == INDENT:
				278	self.find_stmt = 1
				279	self.level += 1
				280
				281	elif type == DEDENT:
				282	self.find_stmt = 1
				283	self.level -= 1
				284
				285	elif type == COMMENT:
				286	if self.find_stmt:
				287	self.stats.append((sline, -1))
				288	# but we're still looking for a new stmt, so leave
				289	# find_stmt alone
				290
				291	elif type == NL:
				292	pass
				293
				294	elif self.find_stmt:
				295	# This is the first "real token" following a NEWLINE, so it
				296	# must be the first token of the next program statement, or an
				297	# ENDMARKER.
				298	self.find_stmt = 0
				299	if line: # not endmarker
				300	self.stats.append((sline, self.level))
				301
				302	# Count number of leading blanks.
				303	def getlspace(line):
				304	i, n = 0, len(line)
				305	while i < n and line[i] == " ":
				306	i += 1
				307	return i
				308
				309	if __name__ == '__main__':
				310	main()