blob: 58a83ce9dac52d1d3c399a132dccb49aa7f4e27c [file] [log] [blame]
"""A set of helper functions for parsing text."""
import re
NEWLINE = '\n'
WHITESPACE_RE = re.compile(r'\w+')
def remove_empty_lines(text):
"""Removes empty lines from text (preserving trailing whitespace)."""
non_empty_lines = []
for line in text.split(NEWLINE):
if not WHITESPACE_RE.match(line):
non_empty_lines.append(line)
return NEWLINE.join(non_empty_lines)
def whitespace(text):
"""Removes leading whitespace from a string.
Args:
text(string): Text to remove whitespace from.
Returns:
A tuple containing the leading whitespace and the remaining text.
"""
match = WHITESPACE_RE.match(text)
if match:
return (match.group(), text[match.end():])
return ('', text)
def until(text, suffixes):
"""Splits text at the first suffix or 'end token'.
Args:
text(string): a string to remove prefixed whitespace from.
suffixes(List[string]): a list of strings that mark the end of a block.
Returns:
A tuple containing the text before the end token, the end token and any
remaining, unprocessed, text.
"""
# Convert the search for each suffix into a single regex
pattern = '({})'.format(r'|'.join(map(re.escape, suffixes)))
match = re.search(pattern, text)
if match:
# If we find a suffix, split the text around it.
chunk = text[:match.start()]
suffix = match.group()
text = text[match.end():]
return chunk, suffix, text
# If no suffix is found, consume the whole string.
return text, '', ''