blob: d3d48f099940521431520bbc42eb466fd099e1fb [file] [log] [blame]
dprankeb08af212015-10-06 17:44:36 -07001# -*- coding: utf-8 -*-
2from __future__ import unicode_literals
3import re
4import sys
5
6
7"""
8Python 3 Stuff
9=============================================================================
10"""
11PY3 = sys.version_info[0] == 3
12
13if PY3: # pragma: no cover
14 string_type = str
15 text_type = str
16 int2str = chr
17else: # pragma: no cover
18 string_type = basestring # noqa
19 text_type = unicode # noqa
20 int2str = unichr # noqa
21
22
23"""
24Constants you might want to modify
25-----------------------------------------------------------------------------
26"""
27
28
29BLOCK_LEVEL_ELEMENTS = re.compile(
30 "^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
31 "|script|noscript|form|fieldset|iframe|math"
32 "|hr|hr/|style|li|dt|dd|thead|tbody"
33 "|tr|th|td|section|footer|header|group|figure"
34 "|figcaption|aside|article|canvas|output"
35 "|progress|video|nav)$",
36 re.IGNORECASE
37)
38# Placeholders
39STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder
40ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder
41INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
42INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
43INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)')
44AMP_SUBSTITUTE = STX+"amp"+ETX
45HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX
46HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')
47TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX
48
49
50"""
51Constants you probably do not need to change
52-----------------------------------------------------------------------------
53"""
54
55RTL_BIDI_RANGES = (
56 ('\u0590', '\u07FF'),
57 # Hebrew (0590-05FF), Arabic (0600-06FF),
58 # Syriac (0700-074F), Arabic supplement (0750-077F),
59 # Thaana (0780-07BF), Nko (07C0-07FF).
60 ('\u2D30', '\u2D7F') # Tifinagh
61)
62
63# Extensions should use "markdown.util.etree" instead of "etree" (or do `from
64# markdown.util import etree`). Do not import it by yourself.
65
66try: # pragma: no cover
67 # Is the C implementation of ElementTree available?
68 import xml.etree.cElementTree as etree
69 from xml.etree.ElementTree import Comment
70 # Serializers (including ours) test with non-c Comment
71 etree.test_comment = Comment
72 if etree.VERSION < "1.0.5":
73 raise RuntimeError("cElementTree version 1.0.5 or higher is required.")
74except (ImportError, RuntimeError): # pragma: no cover
75 # Use the Python implementation of ElementTree?
76 import xml.etree.ElementTree as etree
77 if etree.VERSION < "1.1":
78 raise RuntimeError("ElementTree version 1.1 or higher is required")
79
80
81"""
82AUXILIARY GLOBAL FUNCTIONS
83=============================================================================
84"""
85
86
87def isBlockLevel(tag):
88 """Check if the tag is a block level HTML tag."""
89 if isinstance(tag, string_type):
90 return BLOCK_LEVEL_ELEMENTS.match(tag)
91 # Some ElementTree tags are not strings, so return False.
92 return False
93
94
95def parseBoolValue(value, fail_on_errors=True, preserve_none=False):
96 """Parses a string representing bool value. If parsing was successful,
97 returns True or False. If preserve_none=True, returns True, False,
98 or None. If parsing was not successful, raises ValueError, or, if
99 fail_on_errors=False, returns None."""
100 if not isinstance(value, string_type):
101 if preserve_none and value is None:
102 return value
103 return bool(value)
104 elif preserve_none and value.lower() == 'none':
105 return None
106 elif value.lower() in ('true', 'yes', 'y', 'on', '1'):
107 return True
108 elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'):
109 return False
110 elif fail_on_errors:
111 raise ValueError('Cannot parse bool value: %r' % value)
112
113
114"""
115MISC AUXILIARY CLASSES
116=============================================================================
117"""
118
119
120class AtomicString(text_type):
121 """A string which should not be further processed."""
122 pass
123
124
125class Processor(object):
126 def __init__(self, markdown_instance=None):
127 if markdown_instance:
128 self.markdown = markdown_instance
129
130
131class HtmlStash(object):
132 """
133 This class is used for stashing HTML objects that we extract
134 in the beginning and replace with place-holders.
135 """
136
137 def __init__(self):
138 """ Create a HtmlStash. """
139 self.html_counter = 0 # for counting inline html segments
140 self.rawHtmlBlocks = []
141 self.tag_counter = 0
142 self.tag_data = [] # list of dictionaries in the order tags appear
143
144 def store(self, html, safe=False):
145 """
146 Saves an HTML segment for later reinsertion. Returns a
147 placeholder string that needs to be inserted into the
148 document.
149
150 Keyword arguments:
151
152 * html: an html segment
153 * safe: label an html segment as safe for safemode
154
155 Returns : a placeholder string
156
157 """
158 self.rawHtmlBlocks.append((html, safe))
159 placeholder = self.get_placeholder(self.html_counter)
160 self.html_counter += 1
161 return placeholder
162
163 def reset(self):
164 self.html_counter = 0
165 self.rawHtmlBlocks = []
166
167 def get_placeholder(self, key):
168 return HTML_PLACEHOLDER % key
169
170 def store_tag(self, tag, attrs, left_index, right_index):
171 """Store tag data and return a placeholder."""
172 self.tag_data.append({'tag': tag, 'attrs': attrs,
173 'left_index': left_index,
174 'right_index': right_index})
175 placeholder = TAG_PLACEHOLDER % str(self.tag_counter)
176 self.tag_counter += 1 # equal to the tag's index in self.tag_data
177 return placeholder