Blame - tools/refactoring/stringmanipulation.py - webrtc.googlesource.com/src

blob: 0d9e0ff3a5e13be5fe7182ad1e49053c15227145 [file] [log] [blame]

niklase@google.com	47bdc46	2011-05-30 11:42:35 +0000	[diff] [blame]	1	import string
				2
				3	# returns tuple, [success,updated_string] where the updated string has
				4	# has one less (the first) occurance of match string
				5	def removefirstoccurance( remove_string, match_string ):
				6	lowercase_string = remove_string.lower()
				7	lowercase_match_string = match_string.lower()
				8	lowest_index = lowercase_string.find(lowercase_match_string)
				9	if(lowest_index == -1):
				10	return [False,remove_string]
				11	past_match_index = lowest_index + len(lowercase_match_string)
				12	highest_index = len(remove_string)
				13	remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index]
				14	return [True,remove_string]
				15
				16	# returns a string with all occurances of match_string removed
				17	def removealloccurances( remove_string, match_string ):
				18	return_value = [True, remove_string]
				19	while(return_value[0]):
				20	return_value = removefirstoccurance(return_value[1],match_string)
				21	return return_value[1]
				22
				23	# removes an occurance of match_string only if it's first in the string
				24	# returns tuple [succes, new_string]
				25	def removeprefix( remove_string, match_string ):
				26	lowercase_string = remove_string.lower()
				27	lowercase_match_string = match_string.lower()
				28	lowest_index = lowercase_string.find(lowercase_match_string)
				29	if(lowest_index == -1):
				30	return [False,remove_string]
				31	if(lowest_index != 0):
				32	return [False,remove_string]
				33	past_match_index = lowest_index + len(lowercase_match_string)
				34	highest_index = len(remove_string)
				35	remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index]
				36	# print lowest_index
				37	# print past_match_index
				38	return [True,remove_string]
				39
				40	# removes multiple occurances of match string as long as they are first in
				41	# the string
				42	def removeallprefix( remove_string, match_string ):
				43	return_value = [True, remove_string]
				44	while(return_value[0]):
				45	return_value = removeprefix(return_value[1],match_string)
				46	return return_value[1]
				47
				48	# returns true if extensionstring is a correct extension
				49	def isextension( extensionstring ):
				50	if(len(extensionstring) < 2):
				51	return False
				52	if(extensionstring[0] != '.'):
				53	return False
				54	if(extensionstring[1:len(extensionstring)-1].find('.') != -1):
				55	return False
				56	return True
				57
				58	# returns the index of start of the last occurance of match_string
				59	def findlastoccurance( original_string, match_string ):
				60	search_index = original_string.find(match_string)
				61	found_index = search_index
				62	last_index = len(original_string) - 1
				63	while((search_index != -1) and (search_index < last_index)):
				64	search_index = original_string[search_index+1:last_index].find(match_string)
				65	if(search_index != -1):
				66	found_index = search_index
				67	return found_index
				68
				69	# changes extension from original_extension to new_extension
				70	def changeextension( original_string, original_extension, new_extension):
				71	if(not isextension(original_extension)):
				72	return original_string
				73	if(not isextension(new_extension)):
				74	return original_string
				75	index = findlastoccurance(original_string, original_extension)
				76	if(index == -1):
				77	return original_string
				78	return_value = original_string[0:index] + new_extension
				79	return return_value
				80
				81	# wanted to do this with str.find however didnt seem to work so do it manually
				82	# returns the index of the first capital letter
				83	def findfirstcapitalletter( original_string ):
				84	for index in range(len(original_string)):
				85	if(original_string[index].lower() != original_string[index]):
				86	return index
				87	return -1
				88
				89
				90	# replaces capital letters with underscore and lower case letter (except very
				91	# first
				92	def lowercasewithunderscore( original_string ):
				93	# ignore the first letter since there should be no underscore in front of it
				94	if(len(original_string) < 2):
				95	return original_string
				96	return_value = original_string[1:len(original_string)]
				97	index = findfirstcapitalletter(return_value)
				98	while(index != -1):
				99	return_value = return_value[0:index] + \
				100	'_' + \
				101	return_value[index].lower() + \
				102	return_value[index+1:len(return_value)]
				103	index = findfirstcapitalletter(return_value)
				104	return_value = original_string[0].lower() + return_value
				105	return return_value
				106
				107	# my table is a duplicate of strings
				108	def removeduplicates( my_table ):
				109	new_table = []
				110	for old_string1, new_string1 in my_table:
				111	found = 0
				112	for old_string2, new_string2 in new_table:
				113	if(old_string1 == old_string2):
				114	found += 1
				115	if(new_string1 == new_string2):
				116	if(new_string1 == ''):
				117	found += found
				118	else:
				119	found += 1
				120	if(found == 1):
				121	print 'missmatching set, terminating program'
				122	print old_string1
				123	print new_string1
				124	print old_string2
				125	print new_string2
				126	quit()
				127	if(found == 2):
				128	break
				129	if(found == 0):
				130	new_table.append([old_string1,new_string1])
				131	return new_table
				132
				133	def removenochange( my_table ):
				134	new_table = []
				135	for old_string, new_string in my_table:
				136	if(old_string != new_string):
				137	new_table.append([old_string,new_string])
				138	return new_table
				139
				140	# order table after size of the string (can be used to replace bigger strings
				141	# first which is useful since smaller strings can be inside the bigger string)
				142	# E.g. GIPS is a sub string of GIPSVE if we remove GIPS first GIPSVE will never
				143	# be removed. N is small so no need for fancy sort algorithm. Use selection sort
				144	def ordertablesizefirst( my_table ):
				145	for current_index in range(len(my_table)):
				146	biggest_string = 0
				147	biggest_string_index = -1
				148	for search_index in range(len(my_table)):
				149	if(search_index < current_index):
				150	continue
				151	length_of_string = len(my_table[search_index][0])
				152	if(length_of_string > biggest_string):
				153	biggest_string = length_of_string
				154	biggest_string_index = search_index
				155	if(biggest_string_index == -1):
				156	print 'sorting algorithm failed, program exit'
				157	quit()
				158	old_value = my_table[current_index]
				159	my_table[current_index] = my_table[biggest_string_index]
				160	my_table[biggest_string_index] = old_value
				161	return my_table
				162
				163	# returns true if string 1 or 2 is a substring of the other, assuming neither
				164	# has whitespaces
				165	def issubstring( string1, string2 ):
				166	if(len(string1) == 0):
				167	return -1
				168	if(len(string2) == 0):
				169	return -1
				170	large_string = string1
				171	small_string = string2
				172	if(len(string1) < len(string2)):
				173	large_string = string2
				174	small_string = string1
				175
				176	for index in range(len(large_string)):
				177	large_sub_string = large_string[index:index+len(small_string)].lower()
				178	if(large_sub_string ==\
				179	small_string.lower()):
				180	return index
				181	return -1
				182
				183	#not_part_of_word_table = [' ','(',')','{','}',':','\t','*','&','/','[',']','.',',','\n']
				184	#def ispartofword( char ):
				185	# for item in not_part_of_word_table:
				186	# if(char == item):
				187	# return False
				188	# return True
				189
				190	# must be numerical,_ or charachter
				191	def ispartofword( char ):
				192	if(char.isalpha()):
				193	return True
				194	if(char.isalnum()):
				195	return True
				196	if(char == '_'):
				197	return True
				198	return False
				199
				200	# returns the index of the first letter in the word that the current_index
				201	# is pointing to and the size of the word
				202	def getword( line, current_index):
				203	if(current_index < 0):
				204	return []
				205	line = line.rstrip()
				206	if(len(line) <= current_index):
				207	return []
				208	if(line[current_index] == ' '):
				209	return []
				210	start_pos = current_index
				211	while start_pos >= 0:
				212	if(not ispartofword(line[start_pos])):
				213	start_pos += 1
				214	break
				215	start_pos -= 1
				216	if(start_pos == -1):
				217	start_pos = 0
				218	end_pos = current_index
				219	while end_pos < len(line):
				220	if(not ispartofword(line[end_pos])):
				221	break
				222	end_pos += 1
				223	return [start_pos,end_pos - start_pos]
				224
				225	# my table is a tuple [string1,string2] complement_to_table is just a list
				226	# of strings to compare to string1
				227	def complement( my_table, complement_to_table ):
				228	new_table = []
				229	for index in range(len(my_table)):
				230	found = False;
				231	for compare_string in complement_to_table:
				232	if(my_table[index][0].lower() == compare_string.lower()):
				233	found = True
				234	if(not found):
				235	new_table.append(my_table[index])
				236	return new_table
				237
				238	def removestringfromhead( line, remove_string):
				239	for index in range(len(line)):
				240	if(line[index:index+len(remove_string)] != remove_string):
				241	return line[index:index+len(line)]
				242	return ''
				243
				244	def removeccomment( line ):
				245	comment_string = '//'
				246	for index in range(len(line)):
				247	if(line[index:index+len(comment_string)] == comment_string):
				248	return line[0:index]
				249	return line
				250
				251	def whitespacestoonespace( line ):
				252	return ' '.join(line.split())
				253
				254	def fixabbreviations( original_string ):
				255	previouswascapital = (original_string[0].upper() == original_string[0])
				256	new_string = ''
				257	for index in range(len(original_string)):
				258	if(index == 0):
				259	new_string += original_string[index]
				260	continue
				261	if(original_string[index] == '_'):
				262	new_string += original_string[index]
				263	previouswascapital = False
				264	continue
				265	if(original_string[index].isdigit()):
				266	new_string += original_string[index]
				267	previouswascapital = False
				268	continue
				269	currentiscapital = (original_string[index].upper() == original_string[index])
				270	letter_to_add = original_string[index]
				271	if(previouswascapital and currentiscapital):
				272	letter_to_add = letter_to_add.lower()
				273	if(previouswascapital and (not currentiscapital)):
				274	old_letter = new_string[len(new_string)-1]
				275	new_string = new_string[0:len(new_string)-1]
				276	new_string += old_letter.upper()
				277	previouswascapital = currentiscapital
				278	new_string += letter_to_add
				279	return new_string
				280
				281	def replaceoccurances(old_string, replace_string, replace_with_string):
				282	if (len(replace_string) == 0):
				283	return old_string
				284	if (len(old_string) < len(replace_string)):
				285	return old_string
				286	# Simple implementation, could proably be done smarter
				287	new_string = ''
				288	for index in range(len(old_string)):
				289	#print new_string
				290	if(len(replace_string) > (len(old_string) - index)):
				291	new_string += old_string[index:index + len(old_string)]
				292	break
				293	match = (len(replace_string) > 0)
				294	for replace_index in range(len(replace_string)):
				295	if (replace_string[replace_index] != old_string[index + replace_index]):
				296	match = False
				297	break
				298	if (match):
				299	new_string += replace_with_string
				300	index =+ len(replace_string)
				301	else:
				302	new_string += old_string[index]
				303	return new_string