dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame^] | 1 | # -*- coding: utf-8 -*- |
| 2 | from __future__ import unicode_literals |
| 3 | import re |
| 4 | import sys |
| 5 | |
| 6 | |
| 7 | """ |
| 8 | Python 3 Stuff |
| 9 | ============================================================================= |
| 10 | """ |
| 11 | PY3 = sys.version_info[0] == 3 |
| 12 | |
| 13 | if PY3: # pragma: no cover |
| 14 | string_type = str |
| 15 | text_type = str |
| 16 | int2str = chr |
| 17 | else: # pragma: no cover |
| 18 | string_type = basestring # noqa |
| 19 | text_type = unicode # noqa |
| 20 | int2str = unichr # noqa |
| 21 | |
| 22 | |
| 23 | """ |
| 24 | Constants you might want to modify |
| 25 | ----------------------------------------------------------------------------- |
| 26 | """ |
| 27 | |
| 28 | |
| 29 | BLOCK_LEVEL_ELEMENTS = re.compile( |
| 30 | "^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" |
| 31 | "|script|noscript|form|fieldset|iframe|math" |
| 32 | "|hr|hr/|style|li|dt|dd|thead|tbody" |
| 33 | "|tr|th|td|section|footer|header|group|figure" |
| 34 | "|figcaption|aside|article|canvas|output" |
| 35 | "|progress|video|nav)$", |
| 36 | re.IGNORECASE |
| 37 | ) |
| 38 | # Placeholders |
| 39 | STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder |
| 40 | ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder |
| 41 | INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" |
| 42 | INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX |
| 43 | INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)') |
| 44 | AMP_SUBSTITUTE = STX+"amp"+ETX |
| 45 | HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX |
| 46 | HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') |
| 47 | TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX |
| 48 | |
| 49 | |
| 50 | """ |
| 51 | Constants you probably do not need to change |
| 52 | ----------------------------------------------------------------------------- |
| 53 | """ |
| 54 | |
| 55 | RTL_BIDI_RANGES = ( |
| 56 | ('\u0590', '\u07FF'), |
| 57 | # Hebrew (0590-05FF), Arabic (0600-06FF), |
| 58 | # Syriac (0700-074F), Arabic supplement (0750-077F), |
| 59 | # Thaana (0780-07BF), Nko (07C0-07FF). |
| 60 | ('\u2D30', '\u2D7F') # Tifinagh |
| 61 | ) |
| 62 | |
| 63 | # Extensions should use "markdown.util.etree" instead of "etree" (or do `from |
| 64 | # markdown.util import etree`). Do not import it by yourself. |
| 65 | |
| 66 | try: # pragma: no cover |
| 67 | # Is the C implementation of ElementTree available? |
| 68 | import xml.etree.cElementTree as etree |
| 69 | from xml.etree.ElementTree import Comment |
| 70 | # Serializers (including ours) test with non-c Comment |
| 71 | etree.test_comment = Comment |
| 72 | if etree.VERSION < "1.0.5": |
| 73 | raise RuntimeError("cElementTree version 1.0.5 or higher is required.") |
| 74 | except (ImportError, RuntimeError): # pragma: no cover |
| 75 | # Use the Python implementation of ElementTree? |
| 76 | import xml.etree.ElementTree as etree |
| 77 | if etree.VERSION < "1.1": |
| 78 | raise RuntimeError("ElementTree version 1.1 or higher is required") |
| 79 | |
| 80 | |
| 81 | """ |
| 82 | AUXILIARY GLOBAL FUNCTIONS |
| 83 | ============================================================================= |
| 84 | """ |
| 85 | |
| 86 | |
| 87 | def isBlockLevel(tag): |
| 88 | """Check if the tag is a block level HTML tag.""" |
| 89 | if isinstance(tag, string_type): |
| 90 | return BLOCK_LEVEL_ELEMENTS.match(tag) |
| 91 | # Some ElementTree tags are not strings, so return False. |
| 92 | return False |
| 93 | |
| 94 | |
| 95 | def parseBoolValue(value, fail_on_errors=True, preserve_none=False): |
| 96 | """Parses a string representing bool value. If parsing was successful, |
| 97 | returns True or False. If preserve_none=True, returns True, False, |
| 98 | or None. If parsing was not successful, raises ValueError, or, if |
| 99 | fail_on_errors=False, returns None.""" |
| 100 | if not isinstance(value, string_type): |
| 101 | if preserve_none and value is None: |
| 102 | return value |
| 103 | return bool(value) |
| 104 | elif preserve_none and value.lower() == 'none': |
| 105 | return None |
| 106 | elif value.lower() in ('true', 'yes', 'y', 'on', '1'): |
| 107 | return True |
| 108 | elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'): |
| 109 | return False |
| 110 | elif fail_on_errors: |
| 111 | raise ValueError('Cannot parse bool value: %r' % value) |
| 112 | |
| 113 | |
| 114 | """ |
| 115 | MISC AUXILIARY CLASSES |
| 116 | ============================================================================= |
| 117 | """ |
| 118 | |
| 119 | |
| 120 | class AtomicString(text_type): |
| 121 | """A string which should not be further processed.""" |
| 122 | pass |
| 123 | |
| 124 | |
| 125 | class Processor(object): |
| 126 | def __init__(self, markdown_instance=None): |
| 127 | if markdown_instance: |
| 128 | self.markdown = markdown_instance |
| 129 | |
| 130 | |
| 131 | class HtmlStash(object): |
| 132 | """ |
| 133 | This class is used for stashing HTML objects that we extract |
| 134 | in the beginning and replace with place-holders. |
| 135 | """ |
| 136 | |
| 137 | def __init__(self): |
| 138 | """ Create a HtmlStash. """ |
| 139 | self.html_counter = 0 # for counting inline html segments |
| 140 | self.rawHtmlBlocks = [] |
| 141 | self.tag_counter = 0 |
| 142 | self.tag_data = [] # list of dictionaries in the order tags appear |
| 143 | |
| 144 | def store(self, html, safe=False): |
| 145 | """ |
| 146 | Saves an HTML segment for later reinsertion. Returns a |
| 147 | placeholder string that needs to be inserted into the |
| 148 | document. |
| 149 | |
| 150 | Keyword arguments: |
| 151 | |
| 152 | * html: an html segment |
| 153 | * safe: label an html segment as safe for safemode |
| 154 | |
| 155 | Returns : a placeholder string |
| 156 | |
| 157 | """ |
| 158 | self.rawHtmlBlocks.append((html, safe)) |
| 159 | placeholder = self.get_placeholder(self.html_counter) |
| 160 | self.html_counter += 1 |
| 161 | return placeholder |
| 162 | |
| 163 | def reset(self): |
| 164 | self.html_counter = 0 |
| 165 | self.rawHtmlBlocks = [] |
| 166 | |
| 167 | def get_placeholder(self, key): |
| 168 | return HTML_PLACEHOLDER % key |
| 169 | |
| 170 | def store_tag(self, tag, attrs, left_index, right_index): |
| 171 | """Store tag data and return a placeholder.""" |
| 172 | self.tag_data.append({'tag': tag, 'attrs': attrs, |
| 173 | 'left_index': left_index, |
| 174 | 'right_index': right_index}) |
| 175 | placeholder = TAG_PLACEHOLDER % str(self.tag_counter) |
| 176 | self.tag_counter += 1 # equal to the tag's index in self.tag_data |
| 177 | return placeholder |