Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 1 | """ |
| 2 | Python Markdown |
| 3 | |
| 4 | A Python implementation of John Gruber's Markdown. |
| 5 | |
| 6 | Documentation: https://python-markdown.github.io/ |
| 7 | GitHub: https://github.com/Python-Markdown/markdown/ |
| 8 | PyPI: https://pypi.org/project/Markdown/ |
| 9 | |
| 10 | Started by Manfred Stienstra (http://www.dwerg.net/). |
| 11 | Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). |
| 12 | Currently maintained by Waylan Limberg (https://github.com/waylan), |
| 13 | Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). |
| 14 | |
| 15 | Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) |
| 16 | Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) |
| 17 | Copyright 2004 Manfred Stienstra (the original version) |
| 18 | |
| 19 | License: BSD (see LICENSE.md for details). |
| 20 | """ |
| 21 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 22 | import re |
| 23 | import sys |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 24 | from collections import namedtuple |
| 25 | from functools import wraps |
| 26 | import warnings |
| 27 | import xml.etree.ElementTree |
| 28 | from .pep562 import Pep562 |
| 29 | from itertools import count |
| 30 | |
| 31 | try: |
| 32 | from importlib import metadata |
| 33 | except ImportError: |
| 34 | # <PY38 use backport |
| 35 | import importlib_metadata as metadata |
| 36 | |
| 37 | PY37 = (3, 7) <= sys.version_info |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 38 | |
| 39 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 40 | # TODO: Remove deprecated variables in a future release. |
| 41 | __deprecated__ = { |
| 42 | 'etree': ('xml.etree.ElementTree', xml.etree.ElementTree), |
| 43 | 'string_type': ('str', str), |
| 44 | 'text_type': ('str', str), |
| 45 | 'int2str': ('chr', chr), |
| 46 | 'iterrange': ('range', range) |
| 47 | } |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 48 | |
| 49 | |
| 50 | """ |
| 51 | Constants you might want to modify |
| 52 | ----------------------------------------------------------------------------- |
| 53 | """ |
| 54 | |
| 55 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 56 | BLOCK_LEVEL_ELEMENTS = [ |
| 57 | # Elements which are invalid to wrap in a `<p>` tag. |
| 58 | # See https://w3c.github.io/html/grouping-content.html#the-p-element |
| 59 | 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl', |
| 60 | 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', |
| 61 | 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol', |
| 62 | 'p', 'pre', 'section', 'table', 'ul', |
| 63 | # Other elements which Markdown should not be mucking up the contents of. |
| 64 | 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'iframe', 'li', 'legend', |
| 65 | 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script', |
| 66 | 'style', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video' |
| 67 | ] |
| 68 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 69 | # Placeholders |
| 70 | STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder |
| 71 | ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder |
| 72 | INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" |
| 73 | INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX |
| 74 | INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)') |
| 75 | AMP_SUBSTITUTE = STX+"amp"+ETX |
| 76 | HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX |
| 77 | HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') |
| 78 | TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX |
| 79 | |
| 80 | |
| 81 | """ |
| 82 | Constants you probably do not need to change |
| 83 | ----------------------------------------------------------------------------- |
| 84 | """ |
| 85 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 86 | # Only load extension entry_points once. |
| 87 | INSTALLED_EXTENSIONS = metadata.entry_points().get('markdown.extensions', ()) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 88 | RTL_BIDI_RANGES = ( |
| 89 | ('\u0590', '\u07FF'), |
| 90 | # Hebrew (0590-05FF), Arabic (0600-06FF), |
| 91 | # Syriac (0700-074F), Arabic supplement (0750-077F), |
| 92 | # Thaana (0780-07BF), Nko (07C0-07FF). |
| 93 | ('\u2D30', '\u2D7F') # Tifinagh |
| 94 | ) |
| 95 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 96 | |
| 97 | """ |
| 98 | AUXILIARY GLOBAL FUNCTIONS |
| 99 | ============================================================================= |
| 100 | """ |
| 101 | |
| 102 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 103 | def deprecated(message, stacklevel=2): |
| 104 | """ |
| 105 | Raise a DeprecationWarning when wrapped function/method is called. |
| 106 | |
| 107 | Borrowed from https://stackoverflow.com/a/48632082/866026 |
| 108 | """ |
| 109 | def deprecated_decorator(func): |
| 110 | @wraps(func) |
| 111 | def deprecated_func(*args, **kwargs): |
| 112 | warnings.warn( |
| 113 | "'{}' is deprecated. {}".format(func.__name__, message), |
| 114 | category=DeprecationWarning, |
| 115 | stacklevel=stacklevel |
| 116 | ) |
| 117 | return func(*args, **kwargs) |
| 118 | return deprecated_func |
| 119 | return deprecated_decorator |
| 120 | |
| 121 | |
| 122 | @deprecated("Use 'Markdown.is_block_level' instead.") |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 123 | def isBlockLevel(tag): |
| 124 | """Check if the tag is a block level HTML tag.""" |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 125 | if isinstance(tag, str): |
| 126 | return tag.lower().rstrip('/') in BLOCK_LEVEL_ELEMENTS |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 127 | # Some ElementTree tags are not strings, so return False. |
| 128 | return False |
| 129 | |
| 130 | |
| 131 | def parseBoolValue(value, fail_on_errors=True, preserve_none=False): |
| 132 | """Parses a string representing bool value. If parsing was successful, |
| 133 | returns True or False. If preserve_none=True, returns True, False, |
| 134 | or None. If parsing was not successful, raises ValueError, or, if |
| 135 | fail_on_errors=False, returns None.""" |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 136 | if not isinstance(value, str): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 137 | if preserve_none and value is None: |
| 138 | return value |
| 139 | return bool(value) |
| 140 | elif preserve_none and value.lower() == 'none': |
| 141 | return None |
| 142 | elif value.lower() in ('true', 'yes', 'y', 'on', '1'): |
| 143 | return True |
| 144 | elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'): |
| 145 | return False |
| 146 | elif fail_on_errors: |
| 147 | raise ValueError('Cannot parse bool value: %r' % value) |
| 148 | |
| 149 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 150 | def code_escape(text): |
| 151 | """Escape code.""" |
| 152 | if "&" in text: |
| 153 | text = text.replace("&", "&") |
| 154 | if "<" in text: |
| 155 | text = text.replace("<", "<") |
| 156 | if ">" in text: |
| 157 | text = text.replace(">", ">") |
| 158 | return text |
| 159 | |
| 160 | |
| 161 | def _get_stack_depth(size=2): |
| 162 | """Get stack size for caller's frame. |
| 163 | See https://stackoverflow.com/a/47956089/866026 |
| 164 | """ |
| 165 | frame = sys._getframe(size) |
| 166 | |
| 167 | for size in count(size): |
| 168 | frame = frame.f_back |
| 169 | if not frame: |
| 170 | return size |
| 171 | |
| 172 | |
| 173 | def nearing_recursion_limit(): |
| 174 | """Return true if current stack depth is withing 100 of maximum limit.""" |
| 175 | return sys.getrecursionlimit() - _get_stack_depth() < 100 |
| 176 | |
| 177 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 178 | """ |
| 179 | MISC AUXILIARY CLASSES |
| 180 | ============================================================================= |
| 181 | """ |
| 182 | |
| 183 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 184 | class AtomicString(str): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 185 | """A string which should not be further processed.""" |
| 186 | pass |
| 187 | |
| 188 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 189 | class Processor: |
| 190 | def __init__(self, md=None): |
| 191 | self.md = md |
| 192 | |
| 193 | @property |
| 194 | @deprecated("Use 'md' instead.") |
| 195 | def markdown(self): |
| 196 | # TODO: remove this later |
| 197 | return self.md |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 198 | |
| 199 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 200 | class HtmlStash: |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 201 | """ |
| 202 | This class is used for stashing HTML objects that we extract |
| 203 | in the beginning and replace with place-holders. |
| 204 | """ |
| 205 | |
| 206 | def __init__(self): |
| 207 | """ Create a HtmlStash. """ |
| 208 | self.html_counter = 0 # for counting inline html segments |
| 209 | self.rawHtmlBlocks = [] |
| 210 | self.tag_counter = 0 |
| 211 | self.tag_data = [] # list of dictionaries in the order tags appear |
| 212 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 213 | def store(self, html): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 214 | """ |
| 215 | Saves an HTML segment for later reinsertion. Returns a |
| 216 | placeholder string that needs to be inserted into the |
| 217 | document. |
| 218 | |
| 219 | Keyword arguments: |
| 220 | |
| 221 | * html: an html segment |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 222 | |
| 223 | Returns : a placeholder string |
| 224 | |
| 225 | """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 226 | self.rawHtmlBlocks.append(html) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 227 | placeholder = self.get_placeholder(self.html_counter) |
| 228 | self.html_counter += 1 |
| 229 | return placeholder |
| 230 | |
| 231 | def reset(self): |
| 232 | self.html_counter = 0 |
| 233 | self.rawHtmlBlocks = [] |
| 234 | |
| 235 | def get_placeholder(self, key): |
| 236 | return HTML_PLACEHOLDER % key |
| 237 | |
| 238 | def store_tag(self, tag, attrs, left_index, right_index): |
| 239 | """Store tag data and return a placeholder.""" |
| 240 | self.tag_data.append({'tag': tag, 'attrs': attrs, |
| 241 | 'left_index': left_index, |
| 242 | 'right_index': right_index}) |
| 243 | placeholder = TAG_PLACEHOLDER % str(self.tag_counter) |
| 244 | self.tag_counter += 1 # equal to the tag's index in self.tag_data |
| 245 | return placeholder |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 246 | |
| 247 | |
| 248 | # Used internally by `Registry` for each item in its sorted list. |
| 249 | # Provides an easier to read API when editing the code later. |
| 250 | # For example, `item.name` is more clear than `item[0]`. |
| 251 | _PriorityItem = namedtuple('PriorityItem', ['name', 'priority']) |
| 252 | |
| 253 | |
| 254 | class Registry: |
| 255 | """ |
| 256 | A priority sorted registry. |
| 257 | |
| 258 | A `Registry` instance provides two public methods to alter the data of the |
| 259 | registry: `register` and `deregister`. Use `register` to add items and |
| 260 | `deregister` to remove items. See each method for specifics. |
| 261 | |
| 262 | When registering an item, a "name" and a "priority" must be provided. All |
| 263 | items are automatically sorted by "priority" from highest to lowest. The |
| 264 | "name" is used to remove ("deregister") and get items. |
| 265 | |
| 266 | A `Registry` instance it like a list (which maintains order) when reading |
| 267 | data. You may iterate over the items, get an item and get a count (length) |
| 268 | of all items. You may also check that the registry contains an item. |
| 269 | |
| 270 | When getting an item you may use either the index of the item or the |
| 271 | string-based "name". For example: |
| 272 | |
| 273 | registry = Registry() |
| 274 | registry.register(SomeItem(), 'itemname', 20) |
| 275 | # Get the item by index |
| 276 | item = registry[0] |
| 277 | # Get the item by name |
| 278 | item = registry['itemname'] |
| 279 | |
| 280 | When checking that the registry contains an item, you may use either the |
| 281 | string-based "name", or a reference to the actual item. For example: |
| 282 | |
| 283 | someitem = SomeItem() |
| 284 | registry.register(someitem, 'itemname', 20) |
| 285 | # Contains the name |
| 286 | assert 'itemname' in registry |
| 287 | # Contains the item instance |
| 288 | assert someitem in registry |
| 289 | |
| 290 | The method `get_index_for_name` is also available to obtain the index of |
| 291 | an item using that item's assigned "name". |
| 292 | """ |
| 293 | |
| 294 | def __init__(self): |
| 295 | self._data = {} |
| 296 | self._priority = [] |
| 297 | self._is_sorted = False |
| 298 | |
| 299 | def __contains__(self, item): |
| 300 | if isinstance(item, str): |
| 301 | # Check if an item exists by this name. |
| 302 | return item in self._data.keys() |
| 303 | # Check if this instance exists. |
| 304 | return item in self._data.values() |
| 305 | |
| 306 | def __iter__(self): |
| 307 | self._sort() |
| 308 | return iter([self._data[k] for k, p in self._priority]) |
| 309 | |
| 310 | def __getitem__(self, key): |
| 311 | self._sort() |
| 312 | if isinstance(key, slice): |
| 313 | data = Registry() |
| 314 | for k, p in self._priority[key]: |
| 315 | data.register(self._data[k], k, p) |
| 316 | return data |
| 317 | if isinstance(key, int): |
| 318 | return self._data[self._priority[key].name] |
| 319 | return self._data[key] |
| 320 | |
| 321 | def __len__(self): |
| 322 | return len(self._priority) |
| 323 | |
| 324 | def __repr__(self): |
| 325 | return '<{}({})>'.format(self.__class__.__name__, list(self)) |
| 326 | |
| 327 | def get_index_for_name(self, name): |
| 328 | """ |
| 329 | Return the index of the given name. |
| 330 | """ |
| 331 | if name in self: |
| 332 | self._sort() |
| 333 | return self._priority.index( |
| 334 | [x for x in self._priority if x.name == name][0] |
| 335 | ) |
| 336 | raise ValueError('No item named "{}" exists.'.format(name)) |
| 337 | |
| 338 | def register(self, item, name, priority): |
| 339 | """ |
| 340 | Add an item to the registry with the given name and priority. |
| 341 | |
| 342 | Parameters: |
| 343 | |
| 344 | * `item`: The item being registered. |
| 345 | * `name`: A string used to reference the item. |
| 346 | * `priority`: An integer or float used to sort against all items. |
| 347 | |
| 348 | If an item is registered with a "name" which already exists, the |
| 349 | existing item is replaced with the new item. Tread carefully as the |
| 350 | old item is lost with no way to recover it. The new item will be |
| 351 | sorted according to its priority and will **not** retain the position |
| 352 | of the old item. |
| 353 | """ |
| 354 | if name in self: |
| 355 | # Remove existing item of same name first |
| 356 | self.deregister(name) |
| 357 | self._is_sorted = False |
| 358 | self._data[name] = item |
| 359 | self._priority.append(_PriorityItem(name, priority)) |
| 360 | |
| 361 | def deregister(self, name, strict=True): |
| 362 | """ |
| 363 | Remove an item from the registry. |
| 364 | |
| 365 | Set `strict=False` to fail silently. |
| 366 | """ |
| 367 | try: |
| 368 | index = self.get_index_for_name(name) |
| 369 | del self._priority[index] |
| 370 | del self._data[name] |
| 371 | except ValueError: |
| 372 | if strict: |
| 373 | raise |
| 374 | |
| 375 | def _sort(self): |
| 376 | """ |
| 377 | Sort the registry by priority from highest to lowest. |
| 378 | |
| 379 | This method is called internally and should never be explicitly called. |
| 380 | """ |
| 381 | if not self._is_sorted: |
| 382 | self._priority.sort(key=lambda item: item.priority, reverse=True) |
| 383 | self._is_sorted = True |
| 384 | |
| 385 | # Deprecated Methods which provide a smooth transition from OrderedDict |
| 386 | |
| 387 | def __setitem__(self, key, value): |
| 388 | """ Register item with priorty 5 less than lowest existing priority. """ |
| 389 | if isinstance(key, str): |
| 390 | warnings.warn( |
| 391 | 'Using setitem to register a processor or pattern is deprecated. ' |
| 392 | 'Use the `register` method instead.', |
| 393 | DeprecationWarning, |
| 394 | stacklevel=2, |
| 395 | ) |
| 396 | if key in self: |
| 397 | # Key already exists, replace without altering priority |
| 398 | self._data[key] = value |
| 399 | return |
| 400 | if len(self) == 0: |
| 401 | # This is the first item. Set priority to 50. |
| 402 | priority = 50 |
| 403 | else: |
| 404 | self._sort() |
| 405 | priority = self._priority[-1].priority - 5 |
| 406 | self.register(value, key, priority) |
| 407 | else: |
| 408 | raise TypeError |
| 409 | |
| 410 | def __delitem__(self, key): |
| 411 | """ Deregister an item by name. """ |
| 412 | if key in self: |
| 413 | self.deregister(key) |
| 414 | warnings.warn( |
| 415 | 'Using del to remove a processor or pattern is deprecated. ' |
| 416 | 'Use the `deregister` method instead.', |
| 417 | DeprecationWarning, |
| 418 | stacklevel=2, |
| 419 | ) |
| 420 | else: |
| 421 | raise KeyError('Cannot delete key {}, not registered.'.format(key)) |
| 422 | |
| 423 | def add(self, key, value, location): |
| 424 | """ Register a key by location. """ |
| 425 | if len(self) == 0: |
| 426 | # This is the first item. Set priority to 50. |
| 427 | priority = 50 |
| 428 | elif location == '_begin': |
| 429 | self._sort() |
| 430 | # Set priority 5 greater than highest existing priority |
| 431 | priority = self._priority[0].priority + 5 |
| 432 | elif location == '_end': |
| 433 | self._sort() |
| 434 | # Set priority 5 less than lowest existing priority |
| 435 | priority = self._priority[-1].priority - 5 |
| 436 | elif location.startswith('<') or location.startswith('>'): |
| 437 | # Set priority halfway between existing priorities. |
| 438 | i = self.get_index_for_name(location[1:]) |
| 439 | if location.startswith('<'): |
| 440 | after = self._priority[i].priority |
| 441 | if i > 0: |
| 442 | before = self._priority[i-1].priority |
| 443 | else: |
| 444 | # Location is first item` |
| 445 | before = after + 10 |
| 446 | else: |
| 447 | # location.startswith('>') |
| 448 | before = self._priority[i].priority |
| 449 | if i < len(self) - 1: |
| 450 | after = self._priority[i+1].priority |
| 451 | else: |
| 452 | # location is last item |
| 453 | after = before - 10 |
| 454 | priority = before - ((before - after) / 2) |
| 455 | else: |
| 456 | raise ValueError('Not a valid location: "%s". Location key ' |
| 457 | 'must start with a ">" or "<".' % location) |
| 458 | self.register(value, key, priority) |
| 459 | warnings.warn( |
| 460 | 'Using the add method to register a processor or pattern is deprecated. ' |
| 461 | 'Use the `register` method instead.', |
| 462 | DeprecationWarning, |
| 463 | stacklevel=2, |
| 464 | ) |
| 465 | |
| 466 | |
| 467 | def __getattr__(name): |
| 468 | """Get attribute.""" |
| 469 | |
| 470 | deprecated = __deprecated__.get(name) |
| 471 | if deprecated: |
| 472 | warnings.warn( |
| 473 | "'{}' is deprecated. Use '{}' instead.".format(name, deprecated[0]), |
| 474 | category=DeprecationWarning, |
| 475 | stacklevel=(3 if PY37 else 4) |
| 476 | ) |
| 477 | return deprecated[1] |
| 478 | raise AttributeError("module '{}' has no attribute '{}'".format(__name__, name)) |
| 479 | |
| 480 | |
| 481 | if not PY37: |
| 482 | Pep562(__name__) |