blob: 58a83ce9dac52d1d3c399a132dccb49aa7f4e27c [file] [log] [blame]
Josh Prattf4dee352018-07-30 17:04:49 +10001"""A set of helper functions for parsing text."""
2
3import re
4
5NEWLINE = '\n'
6WHITESPACE_RE = re.compile(r'\w+')
7
8
9def remove_empty_lines(text):
10 """Removes empty lines from text (preserving trailing whitespace)."""
11 non_empty_lines = []
12 for line in text.split(NEWLINE):
13 if not WHITESPACE_RE.match(line):
14 non_empty_lines.append(line)
15 return NEWLINE.join(non_empty_lines)
16
17
18def whitespace(text):
19 """Removes leading whitespace from a string.
20
21 Args:
22 text(string): Text to remove whitespace from.
23 Returns:
24 A tuple containing the leading whitespace and the remaining text.
25 """
26 match = WHITESPACE_RE.match(text)
27 if match:
28 return (match.group(), text[match.end():])
29 return ('', text)
30
31
32def until(text, suffixes):
33 """Splits text at the first suffix or 'end token'.
34
35 Args:
36 text(string): a string to remove prefixed whitespace from.
37 suffixes(List[string]): a list of strings that mark the end of a block.
38 Returns:
39 A tuple containing the text before the end token, the end token and any
40 remaining, unprocessed, text.
41 """
42 # Convert the search for each suffix into a single regex
43 pattern = '({})'.format(r'|'.join(map(re.escape, suffixes)))
44 match = re.search(pattern, text)
45
46 if match:
47 # If we find a suffix, split the text around it.
48 chunk = text[:match.start()]
49 suffix = match.group()
50 text = text[match.end():]
51 return chunk, suffix, text
52 # If no suffix is found, consume the whole string.
53 return text, '', ''