niklase@google.com | 47bdc46 | 2011-05-30 11:42:35 +0000 | [diff] [blame] | 1 | import string |
| 2 | |
| 3 | # returns tuple, [success,updated_string] where the updated string has |
| 4 | # has one less (the first) occurance of match string |
| 5 | def removefirstoccurance( remove_string, match_string ): |
| 6 | lowercase_string = remove_string.lower() |
| 7 | lowercase_match_string = match_string.lower() |
| 8 | lowest_index = lowercase_string.find(lowercase_match_string) |
| 9 | if(lowest_index == -1): |
| 10 | return [False,remove_string] |
| 11 | past_match_index = lowest_index + len(lowercase_match_string) |
| 12 | highest_index = len(remove_string) |
| 13 | remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index] |
| 14 | return [True,remove_string] |
| 15 | |
| 16 | # returns a string with all occurances of match_string removed |
| 17 | def removealloccurances( remove_string, match_string ): |
| 18 | return_value = [True, remove_string] |
| 19 | while(return_value[0]): |
| 20 | return_value = removefirstoccurance(return_value[1],match_string) |
| 21 | return return_value[1] |
| 22 | |
| 23 | # removes an occurance of match_string only if it's first in the string |
| 24 | # returns tuple [succes, new_string] |
| 25 | def removeprefix( remove_string, match_string ): |
| 26 | lowercase_string = remove_string.lower() |
| 27 | lowercase_match_string = match_string.lower() |
| 28 | lowest_index = lowercase_string.find(lowercase_match_string) |
| 29 | if(lowest_index == -1): |
| 30 | return [False,remove_string] |
| 31 | if(lowest_index != 0): |
| 32 | return [False,remove_string] |
| 33 | past_match_index = lowest_index + len(lowercase_match_string) |
| 34 | highest_index = len(remove_string) |
| 35 | remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index] |
| 36 | # print lowest_index |
| 37 | # print past_match_index |
| 38 | return [True,remove_string] |
| 39 | |
| 40 | # removes multiple occurances of match string as long as they are first in |
| 41 | # the string |
| 42 | def removeallprefix( remove_string, match_string ): |
| 43 | return_value = [True, remove_string] |
| 44 | while(return_value[0]): |
| 45 | return_value = removeprefix(return_value[1],match_string) |
| 46 | return return_value[1] |
| 47 | |
| 48 | # returns true if extensionstring is a correct extension |
| 49 | def isextension( extensionstring ): |
| 50 | if(len(extensionstring) < 2): |
| 51 | return False |
| 52 | if(extensionstring[0] != '.'): |
| 53 | return False |
| 54 | if(extensionstring[1:len(extensionstring)-1].find('.') != -1): |
| 55 | return False |
| 56 | return True |
| 57 | |
| 58 | # returns the index of start of the last occurance of match_string |
| 59 | def findlastoccurance( original_string, match_string ): |
| 60 | search_index = original_string.find(match_string) |
| 61 | found_index = search_index |
| 62 | last_index = len(original_string) - 1 |
| 63 | while((search_index != -1) and (search_index < last_index)): |
| 64 | search_index = original_string[search_index+1:last_index].find(match_string) |
| 65 | if(search_index != -1): |
| 66 | found_index = search_index |
| 67 | return found_index |
| 68 | |
| 69 | # changes extension from original_extension to new_extension |
| 70 | def changeextension( original_string, original_extension, new_extension): |
| 71 | if(not isextension(original_extension)): |
| 72 | return original_string |
| 73 | if(not isextension(new_extension)): |
| 74 | return original_string |
| 75 | index = findlastoccurance(original_string, original_extension) |
| 76 | if(index == -1): |
| 77 | return original_string |
| 78 | return_value = original_string[0:index] + new_extension |
| 79 | return return_value |
| 80 | |
| 81 | # wanted to do this with str.find however didnt seem to work so do it manually |
| 82 | # returns the index of the first capital letter |
| 83 | def findfirstcapitalletter( original_string ): |
| 84 | for index in range(len(original_string)): |
| 85 | if(original_string[index].lower() != original_string[index]): |
| 86 | return index |
| 87 | return -1 |
| 88 | |
| 89 | |
| 90 | # replaces capital letters with underscore and lower case letter (except very |
| 91 | # first |
| 92 | def lowercasewithunderscore( original_string ): |
| 93 | # ignore the first letter since there should be no underscore in front of it |
| 94 | if(len(original_string) < 2): |
| 95 | return original_string |
| 96 | return_value = original_string[1:len(original_string)] |
| 97 | index = findfirstcapitalletter(return_value) |
| 98 | while(index != -1): |
| 99 | return_value = return_value[0:index] + \ |
| 100 | '_' + \ |
| 101 | return_value[index].lower() + \ |
| 102 | return_value[index+1:len(return_value)] |
| 103 | index = findfirstcapitalletter(return_value) |
| 104 | return_value = original_string[0].lower() + return_value |
| 105 | return return_value |
| 106 | |
| 107 | # my table is a duplicate of strings |
| 108 | def removeduplicates( my_table ): |
| 109 | new_table = [] |
| 110 | for old_string1, new_string1 in my_table: |
| 111 | found = 0 |
| 112 | for old_string2, new_string2 in new_table: |
| 113 | if(old_string1 == old_string2): |
| 114 | found += 1 |
| 115 | if(new_string1 == new_string2): |
| 116 | if(new_string1 == ''): |
| 117 | found += found |
| 118 | else: |
| 119 | found += 1 |
| 120 | if(found == 1): |
| 121 | print 'missmatching set, terminating program' |
| 122 | print old_string1 |
| 123 | print new_string1 |
| 124 | print old_string2 |
| 125 | print new_string2 |
| 126 | quit() |
| 127 | if(found == 2): |
| 128 | break |
| 129 | if(found == 0): |
| 130 | new_table.append([old_string1,new_string1]) |
| 131 | return new_table |
| 132 | |
| 133 | def removenochange( my_table ): |
| 134 | new_table = [] |
| 135 | for old_string, new_string in my_table: |
| 136 | if(old_string != new_string): |
| 137 | new_table.append([old_string,new_string]) |
| 138 | return new_table |
| 139 | |
| 140 | # order table after size of the string (can be used to replace bigger strings |
| 141 | # first which is useful since smaller strings can be inside the bigger string) |
| 142 | # E.g. GIPS is a sub string of GIPSVE if we remove GIPS first GIPSVE will never |
| 143 | # be removed. N is small so no need for fancy sort algorithm. Use selection sort |
| 144 | def ordertablesizefirst( my_table ): |
| 145 | for current_index in range(len(my_table)): |
| 146 | biggest_string = 0 |
| 147 | biggest_string_index = -1 |
| 148 | for search_index in range(len(my_table)): |
| 149 | if(search_index < current_index): |
| 150 | continue |
| 151 | length_of_string = len(my_table[search_index][0]) |
| 152 | if(length_of_string > biggest_string): |
| 153 | biggest_string = length_of_string |
| 154 | biggest_string_index = search_index |
| 155 | if(biggest_string_index == -1): |
| 156 | print 'sorting algorithm failed, program exit' |
| 157 | quit() |
| 158 | old_value = my_table[current_index] |
| 159 | my_table[current_index] = my_table[biggest_string_index] |
| 160 | my_table[biggest_string_index] = old_value |
| 161 | return my_table |
| 162 | |
| 163 | # returns true if string 1 or 2 is a substring of the other, assuming neither |
| 164 | # has whitespaces |
| 165 | def issubstring( string1, string2 ): |
| 166 | if(len(string1) == 0): |
| 167 | return -1 |
| 168 | if(len(string2) == 0): |
| 169 | return -1 |
| 170 | large_string = string1 |
| 171 | small_string = string2 |
| 172 | if(len(string1) < len(string2)): |
| 173 | large_string = string2 |
| 174 | small_string = string1 |
| 175 | |
| 176 | for index in range(len(large_string)): |
| 177 | large_sub_string = large_string[index:index+len(small_string)].lower() |
| 178 | if(large_sub_string ==\ |
| 179 | small_string.lower()): |
| 180 | return index |
| 181 | return -1 |
| 182 | |
| 183 | #not_part_of_word_table = [' ','(',')','{','}',':','\t','*','&','/','[',']','.',',','\n'] |
| 184 | #def ispartofword( char ): |
| 185 | # for item in not_part_of_word_table: |
| 186 | # if(char == item): |
| 187 | # return False |
| 188 | # return True |
| 189 | |
| 190 | # must be numerical,_ or charachter |
| 191 | def ispartofword( char ): |
| 192 | if(char.isalpha()): |
| 193 | return True |
| 194 | if(char.isalnum()): |
| 195 | return True |
| 196 | if(char == '_'): |
| 197 | return True |
| 198 | return False |
| 199 | |
| 200 | # returns the index of the first letter in the word that the current_index |
| 201 | # is pointing to and the size of the word |
| 202 | def getword( line, current_index): |
| 203 | if(current_index < 0): |
| 204 | return [] |
| 205 | line = line.rstrip() |
| 206 | if(len(line) <= current_index): |
| 207 | return [] |
| 208 | if(line[current_index] == ' '): |
| 209 | return [] |
| 210 | start_pos = current_index |
| 211 | while start_pos >= 0: |
| 212 | if(not ispartofword(line[start_pos])): |
| 213 | start_pos += 1 |
| 214 | break |
| 215 | start_pos -= 1 |
| 216 | if(start_pos == -1): |
| 217 | start_pos = 0 |
| 218 | end_pos = current_index |
| 219 | while end_pos < len(line): |
| 220 | if(not ispartofword(line[end_pos])): |
| 221 | break |
| 222 | end_pos += 1 |
| 223 | return [start_pos,end_pos - start_pos] |
| 224 | |
| 225 | # my table is a tuple [string1,string2] complement_to_table is just a list |
| 226 | # of strings to compare to string1 |
| 227 | def complement( my_table, complement_to_table ): |
| 228 | new_table = [] |
| 229 | for index in range(len(my_table)): |
| 230 | found = False; |
| 231 | for compare_string in complement_to_table: |
| 232 | if(my_table[index][0].lower() == compare_string.lower()): |
| 233 | found = True |
| 234 | if(not found): |
| 235 | new_table.append(my_table[index]) |
| 236 | return new_table |
| 237 | |
| 238 | def removestringfromhead( line, remove_string): |
| 239 | for index in range(len(line)): |
| 240 | if(line[index:index+len(remove_string)] != remove_string): |
| 241 | return line[index:index+len(line)] |
| 242 | return '' |
| 243 | |
| 244 | def removeccomment( line ): |
| 245 | comment_string = '//' |
| 246 | for index in range(len(line)): |
| 247 | if(line[index:index+len(comment_string)] == comment_string): |
| 248 | return line[0:index] |
| 249 | return line |
| 250 | |
| 251 | def whitespacestoonespace( line ): |
| 252 | return ' '.join(line.split()) |
| 253 | |
| 254 | def fixabbreviations( original_string ): |
| 255 | previouswascapital = (original_string[0].upper() == original_string[0]) |
| 256 | new_string = '' |
| 257 | for index in range(len(original_string)): |
| 258 | if(index == 0): |
| 259 | new_string += original_string[index] |
| 260 | continue |
| 261 | if(original_string[index] == '_'): |
| 262 | new_string += original_string[index] |
| 263 | previouswascapital = False |
| 264 | continue |
| 265 | if(original_string[index].isdigit()): |
| 266 | new_string += original_string[index] |
| 267 | previouswascapital = False |
| 268 | continue |
| 269 | currentiscapital = (original_string[index].upper() == original_string[index]) |
| 270 | letter_to_add = original_string[index] |
| 271 | if(previouswascapital and currentiscapital): |
| 272 | letter_to_add = letter_to_add.lower() |
| 273 | if(previouswascapital and (not currentiscapital)): |
| 274 | old_letter = new_string[len(new_string)-1] |
| 275 | new_string = new_string[0:len(new_string)-1] |
| 276 | new_string += old_letter.upper() |
| 277 | previouswascapital = currentiscapital |
| 278 | new_string += letter_to_add |
| 279 | return new_string |
| 280 | |
| 281 | def replaceoccurances(old_string, replace_string, replace_with_string): |
| 282 | if (len(replace_string) == 0): |
| 283 | return old_string |
| 284 | if (len(old_string) < len(replace_string)): |
| 285 | return old_string |
| 286 | # Simple implementation, could proably be done smarter |
| 287 | new_string = '' |
| 288 | for index in range(len(old_string)): |
| 289 | #print new_string |
| 290 | if(len(replace_string) > (len(old_string) - index)): |
| 291 | new_string += old_string[index:index + len(old_string)] |
| 292 | break |
| 293 | match = (len(replace_string) > 0) |
| 294 | for replace_index in range(len(replace_string)): |
| 295 | if (replace_string[replace_index] != old_string[index + replace_index]): |
| 296 | match = False |
| 297 | break |
| 298 | if (match): |
| 299 | new_string += replace_with_string |
| 300 | index =+ len(replace_string) |
| 301 | else: |
| 302 | new_string += old_string[index] |
| 303 | return new_string |