blob: 2cb2317a25775aa013fcda5f1e501e38c13e422a [file] [log] [blame]
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +00001"""
2Python Markdown
3
4A Python implementation of John Gruber's Markdown.
5
6Documentation: https://python-markdown.github.io/
7GitHub: https://github.com/Python-Markdown/markdown/
8PyPI: https://pypi.org/project/Markdown/
9
10Started by Manfred Stienstra (http://www.dwerg.net/).
11Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
12Currently maintained by Waylan Limberg (https://github.com/waylan),
13Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
14
15Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
16Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
17Copyright 2004 Manfred Stienstra (the original version)
18
19License: BSD (see LICENSE.md for details).
20"""
21
dprankeb08af212015-10-06 17:44:36 -070022import re
23import sys
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000024from collections import namedtuple
25from functools import wraps
26import warnings
27import xml.etree.ElementTree
28from .pep562 import Pep562
29from itertools import count
30
31try:
32 from importlib import metadata
33except ImportError:
34 # <PY38 use backport
35 import importlib_metadata as metadata
36
37PY37 = (3, 7) <= sys.version_info
dprankeb08af212015-10-06 17:44:36 -070038
39
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000040# TODO: Remove deprecated variables in a future release.
41__deprecated__ = {
42 'etree': ('xml.etree.ElementTree', xml.etree.ElementTree),
43 'string_type': ('str', str),
44 'text_type': ('str', str),
45 'int2str': ('chr', chr),
46 'iterrange': ('range', range)
47}
dprankeb08af212015-10-06 17:44:36 -070048
49
50"""
51Constants you might want to modify
52-----------------------------------------------------------------------------
53"""
54
55
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000056BLOCK_LEVEL_ELEMENTS = [
57 # Elements which are invalid to wrap in a `<p>` tag.
58 # See https://w3c.github.io/html/grouping-content.html#the-p-element
59 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl',
60 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3',
61 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol',
62 'p', 'pre', 'section', 'table', 'ul',
63 # Other elements which Markdown should not be mucking up the contents of.
64 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'iframe', 'li', 'legend',
65 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script',
66 'style', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video'
67]
68
dprankeb08af212015-10-06 17:44:36 -070069# Placeholders
70STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder
71ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder
72INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
73INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
74INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)')
75AMP_SUBSTITUTE = STX+"amp"+ETX
76HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX
77HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')
78TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX
79
80
81"""
82Constants you probably do not need to change
83-----------------------------------------------------------------------------
84"""
85
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000086# Only load extension entry_points once.
87INSTALLED_EXTENSIONS = metadata.entry_points().get('markdown.extensions', ())
dprankeb08af212015-10-06 17:44:36 -070088RTL_BIDI_RANGES = (
89 ('\u0590', '\u07FF'),
90 # Hebrew (0590-05FF), Arabic (0600-06FF),
91 # Syriac (0700-074F), Arabic supplement (0750-077F),
92 # Thaana (0780-07BF), Nko (07C0-07FF).
93 ('\u2D30', '\u2D7F') # Tifinagh
94)
95
dprankeb08af212015-10-06 17:44:36 -070096
97"""
98AUXILIARY GLOBAL FUNCTIONS
99=============================================================================
100"""
101
102
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000103def deprecated(message, stacklevel=2):
104 """
105 Raise a DeprecationWarning when wrapped function/method is called.
106
107 Borrowed from https://stackoverflow.com/a/48632082/866026
108 """
109 def deprecated_decorator(func):
110 @wraps(func)
111 def deprecated_func(*args, **kwargs):
112 warnings.warn(
113 "'{}' is deprecated. {}".format(func.__name__, message),
114 category=DeprecationWarning,
115 stacklevel=stacklevel
116 )
117 return func(*args, **kwargs)
118 return deprecated_func
119 return deprecated_decorator
120
121
122@deprecated("Use 'Markdown.is_block_level' instead.")
dprankeb08af212015-10-06 17:44:36 -0700123def isBlockLevel(tag):
124 """Check if the tag is a block level HTML tag."""
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000125 if isinstance(tag, str):
126 return tag.lower().rstrip('/') in BLOCK_LEVEL_ELEMENTS
dprankeb08af212015-10-06 17:44:36 -0700127 # Some ElementTree tags are not strings, so return False.
128 return False
129
130
131def parseBoolValue(value, fail_on_errors=True, preserve_none=False):
132 """Parses a string representing bool value. If parsing was successful,
133 returns True or False. If preserve_none=True, returns True, False,
134 or None. If parsing was not successful, raises ValueError, or, if
135 fail_on_errors=False, returns None."""
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000136 if not isinstance(value, str):
dprankeb08af212015-10-06 17:44:36 -0700137 if preserve_none and value is None:
138 return value
139 return bool(value)
140 elif preserve_none and value.lower() == 'none':
141 return None
142 elif value.lower() in ('true', 'yes', 'y', 'on', '1'):
143 return True
144 elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'):
145 return False
146 elif fail_on_errors:
147 raise ValueError('Cannot parse bool value: %r' % value)
148
149
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000150def code_escape(text):
151 """Escape code."""
152 if "&" in text:
153 text = text.replace("&", "&amp;")
154 if "<" in text:
155 text = text.replace("<", "&lt;")
156 if ">" in text:
157 text = text.replace(">", "&gt;")
158 return text
159
160
161def _get_stack_depth(size=2):
162 """Get stack size for caller's frame.
163 See https://stackoverflow.com/a/47956089/866026
164 """
165 frame = sys._getframe(size)
166
167 for size in count(size):
168 frame = frame.f_back
169 if not frame:
170 return size
171
172
173def nearing_recursion_limit():
174 """Return true if current stack depth is withing 100 of maximum limit."""
175 return sys.getrecursionlimit() - _get_stack_depth() < 100
176
177
dprankeb08af212015-10-06 17:44:36 -0700178"""
179MISC AUXILIARY CLASSES
180=============================================================================
181"""
182
183
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000184class AtomicString(str):
dprankeb08af212015-10-06 17:44:36 -0700185 """A string which should not be further processed."""
186 pass
187
188
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000189class Processor:
190 def __init__(self, md=None):
191 self.md = md
192
193 @property
194 @deprecated("Use 'md' instead.")
195 def markdown(self):
196 # TODO: remove this later
197 return self.md
dprankeb08af212015-10-06 17:44:36 -0700198
199
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000200class HtmlStash:
dprankeb08af212015-10-06 17:44:36 -0700201 """
202 This class is used for stashing HTML objects that we extract
203 in the beginning and replace with place-holders.
204 """
205
206 def __init__(self):
207 """ Create a HtmlStash. """
208 self.html_counter = 0 # for counting inline html segments
209 self.rawHtmlBlocks = []
210 self.tag_counter = 0
211 self.tag_data = [] # list of dictionaries in the order tags appear
212
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000213 def store(self, html):
dprankeb08af212015-10-06 17:44:36 -0700214 """
215 Saves an HTML segment for later reinsertion. Returns a
216 placeholder string that needs to be inserted into the
217 document.
218
219 Keyword arguments:
220
221 * html: an html segment
dprankeb08af212015-10-06 17:44:36 -0700222
223 Returns : a placeholder string
224
225 """
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000226 self.rawHtmlBlocks.append(html)
dprankeb08af212015-10-06 17:44:36 -0700227 placeholder = self.get_placeholder(self.html_counter)
228 self.html_counter += 1
229 return placeholder
230
231 def reset(self):
232 self.html_counter = 0
233 self.rawHtmlBlocks = []
234
235 def get_placeholder(self, key):
236 return HTML_PLACEHOLDER % key
237
238 def store_tag(self, tag, attrs, left_index, right_index):
239 """Store tag data and return a placeholder."""
240 self.tag_data.append({'tag': tag, 'attrs': attrs,
241 'left_index': left_index,
242 'right_index': right_index})
243 placeholder = TAG_PLACEHOLDER % str(self.tag_counter)
244 self.tag_counter += 1 # equal to the tag's index in self.tag_data
245 return placeholder
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000246
247
248# Used internally by `Registry` for each item in its sorted list.
249# Provides an easier to read API when editing the code later.
250# For example, `item.name` is more clear than `item[0]`.
251_PriorityItem = namedtuple('PriorityItem', ['name', 'priority'])
252
253
254class Registry:
255 """
256 A priority sorted registry.
257
258 A `Registry` instance provides two public methods to alter the data of the
259 registry: `register` and `deregister`. Use `register` to add items and
260 `deregister` to remove items. See each method for specifics.
261
262 When registering an item, a "name" and a "priority" must be provided. All
263 items are automatically sorted by "priority" from highest to lowest. The
264 "name" is used to remove ("deregister") and get items.
265
266 A `Registry` instance it like a list (which maintains order) when reading
267 data. You may iterate over the items, get an item and get a count (length)
268 of all items. You may also check that the registry contains an item.
269
270 When getting an item you may use either the index of the item or the
271 string-based "name". For example:
272
273 registry = Registry()
274 registry.register(SomeItem(), 'itemname', 20)
275 # Get the item by index
276 item = registry[0]
277 # Get the item by name
278 item = registry['itemname']
279
280 When checking that the registry contains an item, you may use either the
281 string-based "name", or a reference to the actual item. For example:
282
283 someitem = SomeItem()
284 registry.register(someitem, 'itemname', 20)
285 # Contains the name
286 assert 'itemname' in registry
287 # Contains the item instance
288 assert someitem in registry
289
290 The method `get_index_for_name` is also available to obtain the index of
291 an item using that item's assigned "name".
292 """
293
294 def __init__(self):
295 self._data = {}
296 self._priority = []
297 self._is_sorted = False
298
299 def __contains__(self, item):
300 if isinstance(item, str):
301 # Check if an item exists by this name.
302 return item in self._data.keys()
303 # Check if this instance exists.
304 return item in self._data.values()
305
306 def __iter__(self):
307 self._sort()
308 return iter([self._data[k] for k, p in self._priority])
309
310 def __getitem__(self, key):
311 self._sort()
312 if isinstance(key, slice):
313 data = Registry()
314 for k, p in self._priority[key]:
315 data.register(self._data[k], k, p)
316 return data
317 if isinstance(key, int):
318 return self._data[self._priority[key].name]
319 return self._data[key]
320
321 def __len__(self):
322 return len(self._priority)
323
324 def __repr__(self):
325 return '<{}({})>'.format(self.__class__.__name__, list(self))
326
327 def get_index_for_name(self, name):
328 """
329 Return the index of the given name.
330 """
331 if name in self:
332 self._sort()
333 return self._priority.index(
334 [x for x in self._priority if x.name == name][0]
335 )
336 raise ValueError('No item named "{}" exists.'.format(name))
337
338 def register(self, item, name, priority):
339 """
340 Add an item to the registry with the given name and priority.
341
342 Parameters:
343
344 * `item`: The item being registered.
345 * `name`: A string used to reference the item.
346 * `priority`: An integer or float used to sort against all items.
347
348 If an item is registered with a "name" which already exists, the
349 existing item is replaced with the new item. Tread carefully as the
350 old item is lost with no way to recover it. The new item will be
351 sorted according to its priority and will **not** retain the position
352 of the old item.
353 """
354 if name in self:
355 # Remove existing item of same name first
356 self.deregister(name)
357 self._is_sorted = False
358 self._data[name] = item
359 self._priority.append(_PriorityItem(name, priority))
360
361 def deregister(self, name, strict=True):
362 """
363 Remove an item from the registry.
364
365 Set `strict=False` to fail silently.
366 """
367 try:
368 index = self.get_index_for_name(name)
369 del self._priority[index]
370 del self._data[name]
371 except ValueError:
372 if strict:
373 raise
374
375 def _sort(self):
376 """
377 Sort the registry by priority from highest to lowest.
378
379 This method is called internally and should never be explicitly called.
380 """
381 if not self._is_sorted:
382 self._priority.sort(key=lambda item: item.priority, reverse=True)
383 self._is_sorted = True
384
385 # Deprecated Methods which provide a smooth transition from OrderedDict
386
387 def __setitem__(self, key, value):
388 """ Register item with priorty 5 less than lowest existing priority. """
389 if isinstance(key, str):
390 warnings.warn(
391 'Using setitem to register a processor or pattern is deprecated. '
392 'Use the `register` method instead.',
393 DeprecationWarning,
394 stacklevel=2,
395 )
396 if key in self:
397 # Key already exists, replace without altering priority
398 self._data[key] = value
399 return
400 if len(self) == 0:
401 # This is the first item. Set priority to 50.
402 priority = 50
403 else:
404 self._sort()
405 priority = self._priority[-1].priority - 5
406 self.register(value, key, priority)
407 else:
408 raise TypeError
409
410 def __delitem__(self, key):
411 """ Deregister an item by name. """
412 if key in self:
413 self.deregister(key)
414 warnings.warn(
415 'Using del to remove a processor or pattern is deprecated. '
416 'Use the `deregister` method instead.',
417 DeprecationWarning,
418 stacklevel=2,
419 )
420 else:
421 raise KeyError('Cannot delete key {}, not registered.'.format(key))
422
423 def add(self, key, value, location):
424 """ Register a key by location. """
425 if len(self) == 0:
426 # This is the first item. Set priority to 50.
427 priority = 50
428 elif location == '_begin':
429 self._sort()
430 # Set priority 5 greater than highest existing priority
431 priority = self._priority[0].priority + 5
432 elif location == '_end':
433 self._sort()
434 # Set priority 5 less than lowest existing priority
435 priority = self._priority[-1].priority - 5
436 elif location.startswith('<') or location.startswith('>'):
437 # Set priority halfway between existing priorities.
438 i = self.get_index_for_name(location[1:])
439 if location.startswith('<'):
440 after = self._priority[i].priority
441 if i > 0:
442 before = self._priority[i-1].priority
443 else:
444 # Location is first item`
445 before = after + 10
446 else:
447 # location.startswith('>')
448 before = self._priority[i].priority
449 if i < len(self) - 1:
450 after = self._priority[i+1].priority
451 else:
452 # location is last item
453 after = before - 10
454 priority = before - ((before - after) / 2)
455 else:
456 raise ValueError('Not a valid location: "%s". Location key '
457 'must start with a ">" or "<".' % location)
458 self.register(value, key, priority)
459 warnings.warn(
460 'Using the add method to register a processor or pattern is deprecated. '
461 'Use the `register` method instead.',
462 DeprecationWarning,
463 stacklevel=2,
464 )
465
466
467def __getattr__(name):
468 """Get attribute."""
469
470 deprecated = __deprecated__.get(name)
471 if deprecated:
472 warnings.warn(
473 "'{}' is deprecated. Use '{}' instead.".format(name, deprecated[0]),
474 category=DeprecationWarning,
475 stacklevel=(3 if PY37 else 4)
476 )
477 return deprecated[1]
478 raise AttributeError("module '{}' has no attribute '{}'".format(__name__, name))
479
480
481if not PY37:
482 Pep562(__name__)