blob: 4b027bb1f538f5bff0b621957d75ed86c844ec2c [file] [log] [blame]
dprankeb08af212015-10-06 17:44:36 -07001"""
2Tables Extension for Python-Markdown
3====================================
4
5Added parsing of tables to Python-Markdown.
6
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +00007See <https://Python-Markdown.github.io/extensions/tables>
dprankeb08af212015-10-06 17:44:36 -07008for documentation.
9
10Original code Copyright 2009 [Waylan Limberg](http://achinghead.com)
11
12All changes Copyright 2008-2014 The Python Markdown Project
13
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000014License: [BSD](https://opensource.org/licenses/bsd-license.php)
dprankeb08af212015-10-06 17:44:36 -070015
16"""
17
dprankeb08af212015-10-06 17:44:36 -070018from . import Extension
19from ..blockprocessors import BlockProcessor
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000020import xml.etree.ElementTree as etree
21import re
22PIPE_NONE = 0
23PIPE_LEFT = 1
24PIPE_RIGHT = 2
dprankeb08af212015-10-06 17:44:36 -070025
26
27class TableProcessor(BlockProcessor):
28 """ Process Tables. """
29
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000030 RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
31 RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
32
33 def __init__(self, parser):
34 self.border = False
35 self.separator = ''
36 super().__init__(parser)
37
dprankeb08af212015-10-06 17:44:36 -070038 def test(self, parent, block):
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000039 """
40 Ensure first two rows (column header and separator row) are valid table rows.
41
42 Keep border check and separator row do avoid repeating the work.
43 """
44 is_table = False
45 rows = [row.strip(' ') for row in block.split('\n')]
46 if len(rows) > 1:
47 header0 = rows[0]
48 self.border = PIPE_NONE
49 if header0.startswith('|'):
50 self.border |= PIPE_LEFT
51 if self.RE_END_BORDER.search(header0) is not None:
52 self.border |= PIPE_RIGHT
53 row = self._split_row(header0)
54 row0_len = len(row)
55 is_table = row0_len > 1
56
57 # Each row in a single column table needs at least one pipe.
58 if not is_table and row0_len == 1 and self.border:
59 for index in range(1, len(rows)):
60 is_table = rows[index].startswith('|')
61 if not is_table:
62 is_table = self.RE_END_BORDER.search(rows[index]) is not None
63 if not is_table:
64 break
65
66 if is_table:
67 row = self._split_row(rows[1])
68 is_table = (len(row) == row0_len) and set(''.join(row)) <= set('|:- ')
69 if is_table:
70 self.separator = row
71
72 return is_table
dprankeb08af212015-10-06 17:44:36 -070073
74 def run(self, parent, blocks):
75 """ Parse a table block and build table. """
76 block = blocks.pop(0).split('\n')
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000077 header = block[0].strip(' ')
dprankeb08af212015-10-06 17:44:36 -070078 rows = [] if len(block) < 3 else block[2:]
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000079
dprankeb08af212015-10-06 17:44:36 -070080 # Get alignment of columns
81 align = []
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000082 for c in self.separator:
83 c = c.strip(' ')
dprankeb08af212015-10-06 17:44:36 -070084 if c.startswith(':') and c.endswith(':'):
85 align.append('center')
86 elif c.startswith(':'):
87 align.append('left')
88 elif c.endswith(':'):
89 align.append('right')
90 else:
91 align.append(None)
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000092
dprankeb08af212015-10-06 17:44:36 -070093 # Build table
94 table = etree.SubElement(parent, 'table')
95 thead = etree.SubElement(table, 'thead')
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000096 self._build_row(header, thead, align)
dprankeb08af212015-10-06 17:44:36 -070097 tbody = etree.SubElement(table, 'tbody')
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000098 if len(rows) == 0:
99 # Handle empty table
100 self._build_empty_row(tbody, align)
101 else:
102 for row in rows:
103 self._build_row(row.strip(' '), tbody, align)
dprankeb08af212015-10-06 17:44:36 -0700104
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000105 def _build_empty_row(self, parent, align):
106 """Build an empty row."""
107 tr = etree.SubElement(parent, 'tr')
108 count = len(align)
109 while count:
110 etree.SubElement(tr, 'td')
111 count -= 1
112
113 def _build_row(self, row, parent, align):
dprankeb08af212015-10-06 17:44:36 -0700114 """ Given a row of text, build table cells. """
115 tr = etree.SubElement(parent, 'tr')
116 tag = 'td'
117 if parent.tag == 'thead':
118 tag = 'th'
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000119 cells = self._split_row(row)
dprankeb08af212015-10-06 17:44:36 -0700120 # We use align here rather than cells to ensure every row
121 # contains the same number of columns.
122 for i, a in enumerate(align):
123 c = etree.SubElement(tr, tag)
124 try:
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000125 c.text = cells[i].strip(' ')
dprankeb08af212015-10-06 17:44:36 -0700126 except IndexError: # pragma: no cover
127 c.text = ""
128 if a:
129 c.set('align', a)
130
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000131 def _split_row(self, row):
dprankeb08af212015-10-06 17:44:36 -0700132 """ split a row of text into list of cells. """
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000133 if self.border:
dprankeb08af212015-10-06 17:44:36 -0700134 if row.startswith('|'):
135 row = row[1:]
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000136 row = self.RE_END_BORDER.sub('', row)
137 return self._split(row)
138
139 def _split(self, row):
140 """ split a row of text with some code into a list of cells. """
141 elements = []
142 pipes = []
143 tics = []
144 tic_points = []
145 tic_region = []
146 good_pipes = []
147
148 # Parse row
149 # Throw out \\, and \|
150 for m in self.RE_CODE_PIPES.finditer(row):
151 # Store ` data (len, start_pos, end_pos)
152 if m.group(2):
153 # \`+
154 # Store length of each tic group: subtract \
155 tics.append(len(m.group(2)) - 1)
156 # Store start of group, end of group, and escape length
157 tic_points.append((m.start(2), m.end(2) - 1, 1))
158 elif m.group(3):
159 # `+
160 # Store length of each tic group
161 tics.append(len(m.group(3)))
162 # Store start of group, end of group, and escape length
163 tic_points.append((m.start(3), m.end(3) - 1, 0))
164 # Store pipe location
165 elif m.group(5):
166 pipes.append(m.start(5))
167
168 # Pair up tics according to size if possible
169 # Subtract the escape length *only* from the opening.
170 # Walk through tic list and see if tic has a close.
171 # Store the tic region (start of region, end of region).
172 pos = 0
173 tic_len = len(tics)
174 while pos < tic_len:
175 try:
176 tic_size = tics[pos] - tic_points[pos][2]
177 if tic_size == 0:
178 raise ValueError
179 index = tics[pos + 1:].index(tic_size) + 1
180 tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
181 pos += index + 1
182 except ValueError:
183 pos += 1
184
185 # Resolve pipes. Check if they are within a tic pair region.
186 # Walk through pipes comparing them to each region.
187 # - If pipe position is less that a region, it isn't in a region
188 # - If it is within a region, we don't want it, so throw it out
189 # - If we didn't throw it out, it must be a table pipe
190 for pipe in pipes:
191 throw_out = False
192 for region in tic_region:
193 if pipe < region[0]:
194 # Pipe is not in a region
195 break
196 elif region[0] <= pipe <= region[1]:
197 # Pipe is within a code region. Throw it out.
198 throw_out = True
199 break
200 if not throw_out:
201 good_pipes.append(pipe)
202
203 # Split row according to table delimeters.
204 pos = 0
205 for pipe in good_pipes:
206 elements.append(row[pos:pipe])
207 pos = pipe + 1
208 elements.append(row[pos:])
209 return elements
dprankeb08af212015-10-06 17:44:36 -0700210
211
212class TableExtension(Extension):
213 """ Add tables to Markdown. """
214
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000215 def extendMarkdown(self, md):
dprankeb08af212015-10-06 17:44:36 -0700216 """ Add an instance of TableProcessor to BlockParser. """
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000217 if '|' not in md.ESCAPED_CHARS:
218 md.ESCAPED_CHARS.append('|')
219 md.parser.blockprocessors.register(TableProcessor(md.parser), 'table', 75)
dprankeb08af212015-10-06 17:44:36 -0700220
221
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000222def makeExtension(**kwargs): # pragma: no cover
223 return TableExtension(**kwargs)