dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 1 | """ |
| 2 | Tables Extension for Python-Markdown |
| 3 | ==================================== |
| 4 | |
| 5 | Added parsing of tables to Python-Markdown. |
| 6 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 7 | See <https://Python-Markdown.github.io/extensions/tables> |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 8 | for documentation. |
| 9 | |
| 10 | Original code Copyright 2009 [Waylan Limberg](http://achinghead.com) |
| 11 | |
| 12 | All changes Copyright 2008-2014 The Python Markdown Project |
| 13 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 14 | License: [BSD](https://opensource.org/licenses/bsd-license.php) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 15 | |
| 16 | """ |
| 17 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 18 | from . import Extension |
| 19 | from ..blockprocessors import BlockProcessor |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 20 | import xml.etree.ElementTree as etree |
| 21 | import re |
| 22 | PIPE_NONE = 0 |
| 23 | PIPE_LEFT = 1 |
| 24 | PIPE_RIGHT = 2 |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 25 | |
| 26 | |
| 27 | class TableProcessor(BlockProcessor): |
| 28 | """ Process Tables. """ |
| 29 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 30 | RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))') |
| 31 | RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$') |
| 32 | |
| 33 | def __init__(self, parser): |
| 34 | self.border = False |
| 35 | self.separator = '' |
| 36 | super().__init__(parser) |
| 37 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 38 | def test(self, parent, block): |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 39 | """ |
| 40 | Ensure first two rows (column header and separator row) are valid table rows. |
| 41 | |
| 42 | Keep border check and separator row do avoid repeating the work. |
| 43 | """ |
| 44 | is_table = False |
| 45 | rows = [row.strip(' ') for row in block.split('\n')] |
| 46 | if len(rows) > 1: |
| 47 | header0 = rows[0] |
| 48 | self.border = PIPE_NONE |
| 49 | if header0.startswith('|'): |
| 50 | self.border |= PIPE_LEFT |
| 51 | if self.RE_END_BORDER.search(header0) is not None: |
| 52 | self.border |= PIPE_RIGHT |
| 53 | row = self._split_row(header0) |
| 54 | row0_len = len(row) |
| 55 | is_table = row0_len > 1 |
| 56 | |
| 57 | # Each row in a single column table needs at least one pipe. |
| 58 | if not is_table and row0_len == 1 and self.border: |
| 59 | for index in range(1, len(rows)): |
| 60 | is_table = rows[index].startswith('|') |
| 61 | if not is_table: |
| 62 | is_table = self.RE_END_BORDER.search(rows[index]) is not None |
| 63 | if not is_table: |
| 64 | break |
| 65 | |
| 66 | if is_table: |
| 67 | row = self._split_row(rows[1]) |
| 68 | is_table = (len(row) == row0_len) and set(''.join(row)) <= set('|:- ') |
| 69 | if is_table: |
| 70 | self.separator = row |
| 71 | |
| 72 | return is_table |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 73 | |
| 74 | def run(self, parent, blocks): |
| 75 | """ Parse a table block and build table. """ |
| 76 | block = blocks.pop(0).split('\n') |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 77 | header = block[0].strip(' ') |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 78 | rows = [] if len(block) < 3 else block[2:] |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 79 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 80 | # Get alignment of columns |
| 81 | align = [] |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 82 | for c in self.separator: |
| 83 | c = c.strip(' ') |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 84 | if c.startswith(':') and c.endswith(':'): |
| 85 | align.append('center') |
| 86 | elif c.startswith(':'): |
| 87 | align.append('left') |
| 88 | elif c.endswith(':'): |
| 89 | align.append('right') |
| 90 | else: |
| 91 | align.append(None) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 92 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 93 | # Build table |
| 94 | table = etree.SubElement(parent, 'table') |
| 95 | thead = etree.SubElement(table, 'thead') |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 96 | self._build_row(header, thead, align) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 97 | tbody = etree.SubElement(table, 'tbody') |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 98 | if len(rows) == 0: |
| 99 | # Handle empty table |
| 100 | self._build_empty_row(tbody, align) |
| 101 | else: |
| 102 | for row in rows: |
| 103 | self._build_row(row.strip(' '), tbody, align) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 104 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 105 | def _build_empty_row(self, parent, align): |
| 106 | """Build an empty row.""" |
| 107 | tr = etree.SubElement(parent, 'tr') |
| 108 | count = len(align) |
| 109 | while count: |
| 110 | etree.SubElement(tr, 'td') |
| 111 | count -= 1 |
| 112 | |
| 113 | def _build_row(self, row, parent, align): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 114 | """ Given a row of text, build table cells. """ |
| 115 | tr = etree.SubElement(parent, 'tr') |
| 116 | tag = 'td' |
| 117 | if parent.tag == 'thead': |
| 118 | tag = 'th' |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 119 | cells = self._split_row(row) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 120 | # We use align here rather than cells to ensure every row |
| 121 | # contains the same number of columns. |
| 122 | for i, a in enumerate(align): |
| 123 | c = etree.SubElement(tr, tag) |
| 124 | try: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 125 | c.text = cells[i].strip(' ') |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 126 | except IndexError: # pragma: no cover |
| 127 | c.text = "" |
| 128 | if a: |
| 129 | c.set('align', a) |
| 130 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 131 | def _split_row(self, row): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 132 | """ split a row of text into list of cells. """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 133 | if self.border: |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 134 | if row.startswith('|'): |
| 135 | row = row[1:] |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 136 | row = self.RE_END_BORDER.sub('', row) |
| 137 | return self._split(row) |
| 138 | |
| 139 | def _split(self, row): |
| 140 | """ split a row of text with some code into a list of cells. """ |
| 141 | elements = [] |
| 142 | pipes = [] |
| 143 | tics = [] |
| 144 | tic_points = [] |
| 145 | tic_region = [] |
| 146 | good_pipes = [] |
| 147 | |
| 148 | # Parse row |
| 149 | # Throw out \\, and \| |
| 150 | for m in self.RE_CODE_PIPES.finditer(row): |
| 151 | # Store ` data (len, start_pos, end_pos) |
| 152 | if m.group(2): |
| 153 | # \`+ |
| 154 | # Store length of each tic group: subtract \ |
| 155 | tics.append(len(m.group(2)) - 1) |
| 156 | # Store start of group, end of group, and escape length |
| 157 | tic_points.append((m.start(2), m.end(2) - 1, 1)) |
| 158 | elif m.group(3): |
| 159 | # `+ |
| 160 | # Store length of each tic group |
| 161 | tics.append(len(m.group(3))) |
| 162 | # Store start of group, end of group, and escape length |
| 163 | tic_points.append((m.start(3), m.end(3) - 1, 0)) |
| 164 | # Store pipe location |
| 165 | elif m.group(5): |
| 166 | pipes.append(m.start(5)) |
| 167 | |
| 168 | # Pair up tics according to size if possible |
| 169 | # Subtract the escape length *only* from the opening. |
| 170 | # Walk through tic list and see if tic has a close. |
| 171 | # Store the tic region (start of region, end of region). |
| 172 | pos = 0 |
| 173 | tic_len = len(tics) |
| 174 | while pos < tic_len: |
| 175 | try: |
| 176 | tic_size = tics[pos] - tic_points[pos][2] |
| 177 | if tic_size == 0: |
| 178 | raise ValueError |
| 179 | index = tics[pos + 1:].index(tic_size) + 1 |
| 180 | tic_region.append((tic_points[pos][0], tic_points[pos + index][1])) |
| 181 | pos += index + 1 |
| 182 | except ValueError: |
| 183 | pos += 1 |
| 184 | |
| 185 | # Resolve pipes. Check if they are within a tic pair region. |
| 186 | # Walk through pipes comparing them to each region. |
| 187 | # - If pipe position is less that a region, it isn't in a region |
| 188 | # - If it is within a region, we don't want it, so throw it out |
| 189 | # - If we didn't throw it out, it must be a table pipe |
| 190 | for pipe in pipes: |
| 191 | throw_out = False |
| 192 | for region in tic_region: |
| 193 | if pipe < region[0]: |
| 194 | # Pipe is not in a region |
| 195 | break |
| 196 | elif region[0] <= pipe <= region[1]: |
| 197 | # Pipe is within a code region. Throw it out. |
| 198 | throw_out = True |
| 199 | break |
| 200 | if not throw_out: |
| 201 | good_pipes.append(pipe) |
| 202 | |
| 203 | # Split row according to table delimeters. |
| 204 | pos = 0 |
| 205 | for pipe in good_pipes: |
| 206 | elements.append(row[pos:pipe]) |
| 207 | pos = pipe + 1 |
| 208 | elements.append(row[pos:]) |
| 209 | return elements |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 210 | |
| 211 | |
| 212 | class TableExtension(Extension): |
| 213 | """ Add tables to Markdown. """ |
| 214 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 215 | def extendMarkdown(self, md): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 216 | """ Add an instance of TableProcessor to BlockParser. """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 217 | if '|' not in md.ESCAPED_CHARS: |
| 218 | md.ESCAPED_CHARS.append('|') |
| 219 | md.parser.blockprocessors.register(TableProcessor(md.parser), 'table', 75) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 220 | |
| 221 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 222 | def makeExtension(**kwargs): # pragma: no cover |
| 223 | return TableExtension(**kwargs) |