dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 1 | """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 2 | Python Markdown |
| 3 | |
| 4 | A Python implementation of John Gruber's Markdown. |
| 5 | |
| 6 | Documentation: https://python-markdown.github.io/ |
| 7 | GitHub: https://github.com/Python-Markdown/markdown/ |
| 8 | PyPI: https://pypi.org/project/Markdown/ |
| 9 | |
| 10 | Started by Manfred Stienstra (http://www.dwerg.net/). |
| 11 | Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). |
| 12 | Currently maintained by Waylan Limberg (https://github.com/waylan), |
| 13 | Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). |
| 14 | |
| 15 | Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) |
| 16 | Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) |
| 17 | Copyright 2004 Manfred Stienstra (the original version) |
| 18 | |
| 19 | License: BSD (see LICENSE.md for details). |
| 20 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 21 | INLINE PATTERNS |
| 22 | ============================================================================= |
| 23 | |
| 24 | Inline patterns such as *emphasis* are handled by means of auxiliary |
| 25 | objects, one per pattern. Pattern objects must be instances of classes |
| 26 | that extend markdown.Pattern. Each pattern object uses a single regular |
| 27 | expression and needs support the following methods: |
| 28 | |
| 29 | pattern.getCompiledRegExp() # returns a regular expression |
| 30 | |
| 31 | pattern.handleMatch(m) # takes a match object and returns |
| 32 | # an ElementTree element or just plain text |
| 33 | |
| 34 | All of python markdown's built-in patterns subclass from Pattern, |
| 35 | but you can add additional patterns that don't. |
| 36 | |
| 37 | Also note that all the regular expressions used by inline must |
| 38 | capture the whole block. For this reason, they all start with |
| 39 | '^(.*)' and end with '(.*)!'. In case with built-in expression |
| 40 | Pattern takes care of adding the "^(.*)" and "(.*)!". |
| 41 | |
| 42 | Finally, the order in which regular expressions are applied is very |
| 43 | important - e.g. if we first replace http://.../ links with <a> tags |
| 44 | and _then_ try to replace inline html, we would end up with a mess. |
| 45 | So, we apply the expressions in the following order: |
| 46 | |
| 47 | * escape and backticks have to go before everything else, so |
| 48 | that we can preempt any markdown patterns by escaping them. |
| 49 | |
| 50 | * then we handle auto-links (must be done before inline html) |
| 51 | |
| 52 | * then we handle inline HTML. At this point we will simply |
| 53 | replace all inline HTML strings with a placeholder and add |
| 54 | the actual HTML to a hash. |
| 55 | |
| 56 | * then inline images (must be done before links) |
| 57 | |
| 58 | * then bracketed links, first regular then reference-style |
| 59 | |
| 60 | * finally we apply strong and emphasis |
| 61 | """ |
| 62 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 63 | from . import util |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 64 | from collections import namedtuple |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 65 | import re |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 66 | import xml.etree.ElementTree as etree |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 67 | try: # pragma: no cover |
| 68 | from html import entities |
| 69 | except ImportError: # pragma: no cover |
| 70 | import htmlentitydefs as entities |
| 71 | |
| 72 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 73 | def build_inlinepatterns(md, **kwargs): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 74 | """ Build the default set of inline patterns for Markdown. """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 75 | inlinePatterns = util.Registry() |
| 76 | inlinePatterns.register(BacktickInlineProcessor(BACKTICK_RE), 'backtick', 190) |
| 77 | inlinePatterns.register(EscapeInlineProcessor(ESCAPE_RE, md), 'escape', 180) |
| 78 | inlinePatterns.register(ReferenceInlineProcessor(REFERENCE_RE, md), 'reference', 170) |
| 79 | inlinePatterns.register(LinkInlineProcessor(LINK_RE, md), 'link', 160) |
| 80 | inlinePatterns.register(ImageInlineProcessor(IMAGE_LINK_RE, md), 'image_link', 150) |
| 81 | inlinePatterns.register( |
| 82 | ImageReferenceInlineProcessor(IMAGE_REFERENCE_RE, md), 'image_reference', 140 |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 83 | ) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 84 | inlinePatterns.register( |
| 85 | ShortReferenceInlineProcessor(REFERENCE_RE, md), 'short_reference', 130 |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 86 | ) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 87 | inlinePatterns.register( |
| 88 | ShortImageReferenceInlineProcessor(IMAGE_REFERENCE_RE, md), 'short_image_ref', 125 |
| 89 | ) |
| 90 | inlinePatterns.register(AutolinkInlineProcessor(AUTOLINK_RE, md), 'autolink', 120) |
| 91 | inlinePatterns.register(AutomailInlineProcessor(AUTOMAIL_RE, md), 'automail', 110) |
| 92 | inlinePatterns.register(SubstituteTagInlineProcessor(LINE_BREAK_RE, 'br'), 'linebreak', 100) |
| 93 | inlinePatterns.register(HtmlInlineProcessor(HTML_RE, md), 'html', 90) |
| 94 | inlinePatterns.register(HtmlInlineProcessor(ENTITY_RE, md), 'entity', 80) |
| 95 | inlinePatterns.register(SimpleTextInlineProcessor(NOT_STRONG_RE), 'not_strong', 70) |
| 96 | inlinePatterns.register(AsteriskProcessor(r'\*'), 'em_strong', 60) |
| 97 | inlinePatterns.register(UnderscoreProcessor(r'_'), 'em_strong2', 50) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 98 | return inlinePatterns |
| 99 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 100 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 101 | """ |
| 102 | The actual regular expressions for patterns |
| 103 | ----------------------------------------------------------------------------- |
| 104 | """ |
| 105 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 106 | NOIMG = r'(?<!\!)' |
| 107 | |
| 108 | # `e=f()` or ``e=f("`")`` |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 109 | BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\2(?!`))' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 110 | |
| 111 | # \< |
| 112 | ESCAPE_RE = r'\\(.)' |
| 113 | |
| 114 | # *emphasis* |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 115 | EMPHASIS_RE = r'(\*)([^\*]+)\1' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 116 | |
| 117 | # **strong** |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 118 | STRONG_RE = r'(\*{2})(.+?)\1' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 119 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 120 | # __smart__strong__ |
| 121 | SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\1(?!\w)' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 122 | |
| 123 | # _smart_emphasis_ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 124 | SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\1(?!\w)' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 125 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 126 | # __strong _em__ |
| 127 | SMART_STRONG_EM_RE = r'(?<!\w)(\_)\1(?!\1)(.+?)(?<!\w)\1(?!\1)(.+?)\1{3}(?!\w)' |
| 128 | |
| 129 | # ***strongem*** or ***em*strong** |
| 130 | EM_STRONG_RE = r'(\*)\1{2}(.+?)\1(.*?)\1{2}' |
| 131 | |
| 132 | # ___strongem___ or ___em_strong__ |
| 133 | EM_STRONG2_RE = r'(_)\1{2}(.+?)\1(.*?)\1{2}' |
| 134 | |
| 135 | # ***strong**em* |
| 136 | STRONG_EM_RE = r'(\*)\1{2}(.+?)\1{2}(.*?)\1' |
| 137 | |
| 138 | # ___strong__em_ |
| 139 | STRONG_EM2_RE = r'(_)\1{2}(.+?)\1{2}(.*?)\1' |
| 140 | |
| 141 | # **strong*em*** |
| 142 | STRONG_EM3_RE = r'(\*)\1(?!\1)([^*]+?)\1(?!\1)(.+?)\1{3}' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 143 | |
| 144 | # [text](url) or [text](<url>) or [text](url "title") |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 145 | LINK_RE = NOIMG + r'\[' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 146 | |
| 147 | #  or  |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 148 | IMAGE_LINK_RE = r'\!\[' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 149 | |
| 150 | # [Google][3] |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 151 | REFERENCE_RE = LINK_RE |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 152 | |
| 153 | # ![alt text][2] |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 154 | IMAGE_REFERENCE_RE = IMAGE_LINK_RE |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 155 | |
| 156 | # stand-alone * or _ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 157 | NOT_STRONG_RE = r'((^|\s)(\*|_)(\s|$))' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 158 | |
| 159 | # <http://www.123.com> |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 160 | AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 161 | |
| 162 | # <me@example.com> |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 163 | AUTOMAIL_RE = r'<([^<> !]*@[^@<> ]*)>' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 164 | |
| 165 | # <...> |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 166 | HTML_RE = r'(<([a-zA-Z/][^<>]*|!--(?:(?!<!--|-->).)*--)>)' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 167 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 168 | # "&" (decimal) or "&" (hex) or "&" (named) |
| 169 | ENTITY_RE = r'(&(?:\#[0-9]+|\#x[0-9a-fA-F]+|[a-zA-Z0-9]+);)' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 170 | |
| 171 | # two spaces at end of line |
| 172 | LINE_BREAK_RE = r' \n' |
| 173 | |
| 174 | |
| 175 | def dequote(string): |
| 176 | """Remove quotes from around a string.""" |
| 177 | if ((string.startswith('"') and string.endswith('"')) or |
| 178 | (string.startswith("'") and string.endswith("'"))): |
| 179 | return string[1:-1] |
| 180 | else: |
| 181 | return string |
| 182 | |
| 183 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 184 | class EmStrongItem(namedtuple('EmStrongItem', ['pattern', 'builder', 'tags'])): |
| 185 | """Emphasis/strong pattern item.""" |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 186 | |
| 187 | |
| 188 | """ |
| 189 | The pattern classes |
| 190 | ----------------------------------------------------------------------------- |
| 191 | """ |
| 192 | |
| 193 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 194 | class Pattern: # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 195 | """Base class that inline patterns subclass. """ |
| 196 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 197 | ANCESTOR_EXCLUDES = tuple() |
| 198 | |
| 199 | def __init__(self, pattern, md=None): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 200 | """ |
| 201 | Create an instant of an inline pattern. |
| 202 | |
| 203 | Keyword arguments: |
| 204 | |
| 205 | * pattern: A regular expression that matches a pattern |
| 206 | |
| 207 | """ |
| 208 | self.pattern = pattern |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 209 | self.compiled_re = re.compile(r"^(.*?)%s(.*)$" % pattern, |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 210 | re.DOTALL | re.UNICODE) |
| 211 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 212 | self.md = md |
| 213 | |
| 214 | @property |
| 215 | @util.deprecated("Use 'md' instead.") |
| 216 | def markdown(self): |
| 217 | # TODO: remove this later |
| 218 | return self.md |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 219 | |
| 220 | def getCompiledRegExp(self): |
| 221 | """ Return a compiled regular expression. """ |
| 222 | return self.compiled_re |
| 223 | |
| 224 | def handleMatch(self, m): |
| 225 | """Return a ElementTree element from the given match. |
| 226 | |
| 227 | Subclasses should override this method. |
| 228 | |
| 229 | Keyword arguments: |
| 230 | |
| 231 | * m: A re match object containing a match of the pattern. |
| 232 | |
| 233 | """ |
| 234 | pass # pragma: no cover |
| 235 | |
| 236 | def type(self): |
| 237 | """ Return class name, to define pattern type """ |
| 238 | return self.__class__.__name__ |
| 239 | |
| 240 | def unescape(self, text): |
| 241 | """ Return unescaped text given text with an inline placeholder. """ |
| 242 | try: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 243 | stash = self.md.treeprocessors['inline'].stashed_nodes |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 244 | except KeyError: # pragma: no cover |
| 245 | return text |
| 246 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 247 | def get_stash(m): |
| 248 | id = m.group(1) |
| 249 | if id in stash: |
| 250 | value = stash.get(id) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 251 | if isinstance(value, str): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 252 | return value |
| 253 | else: |
| 254 | # An etree Element - return text content only |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 255 | return ''.join(value.itertext()) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 256 | return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) |
| 257 | |
| 258 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 259 | class InlineProcessor(Pattern): |
| 260 | """ |
| 261 | Base class that inline patterns subclass. |
| 262 | |
| 263 | This is the newer style inline processor that uses a more |
| 264 | efficient and flexible search approach. |
| 265 | """ |
| 266 | |
| 267 | def __init__(self, pattern, md=None): |
| 268 | """ |
| 269 | Create an instant of an inline pattern. |
| 270 | |
| 271 | Keyword arguments: |
| 272 | |
| 273 | * pattern: A regular expression that matches a pattern |
| 274 | |
| 275 | """ |
| 276 | self.pattern = pattern |
| 277 | self.compiled_re = re.compile(pattern, re.DOTALL | re.UNICODE) |
| 278 | |
| 279 | # Api for Markdown to pass safe_mode into instance |
| 280 | self.safe_mode = False |
| 281 | self.md = md |
| 282 | |
| 283 | def handleMatch(self, m, data): |
| 284 | """Return a ElementTree element from the given match and the |
| 285 | start and end index of the matched text. |
| 286 | |
| 287 | If `start` and/or `end` are returned as `None`, it will be |
| 288 | assumed that the processor did not find a valid region of text. |
| 289 | |
| 290 | Subclasses should override this method. |
| 291 | |
| 292 | Keyword arguments: |
| 293 | |
| 294 | * m: A re match object containing a match of the pattern. |
| 295 | * data: The buffer current under analysis |
| 296 | |
| 297 | Returns: |
| 298 | |
| 299 | * el: The ElementTree element, text or None. |
| 300 | * start: The start of the region that has been matched or None. |
| 301 | * end: The end of the region that has been matched or None. |
| 302 | |
| 303 | """ |
| 304 | pass # pragma: no cover |
| 305 | |
| 306 | |
| 307 | class SimpleTextPattern(Pattern): # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 308 | """ Return a simple text of group(2) of a Pattern. """ |
| 309 | def handleMatch(self, m): |
| 310 | return m.group(2) |
| 311 | |
| 312 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 313 | class SimpleTextInlineProcessor(InlineProcessor): |
| 314 | """ Return a simple text of group(1) of a Pattern. """ |
| 315 | def handleMatch(self, m, data): |
| 316 | return m.group(1), m.start(0), m.end(0) |
| 317 | |
| 318 | |
| 319 | class EscapeInlineProcessor(InlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 320 | """ Return an escaped character. """ |
| 321 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 322 | def handleMatch(self, m, data): |
| 323 | char = m.group(1) |
| 324 | if char in self.md.ESCAPED_CHARS: |
| 325 | return '{}{}{}'.format(util.STX, ord(char), util.ETX), m.start(0), m.end(0) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 326 | else: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 327 | return None, m.start(0), m.end(0) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 328 | |
| 329 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 330 | class SimpleTagPattern(Pattern): # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 331 | """ |
| 332 | Return element of type `tag` with a text attribute of group(3) |
| 333 | of a Pattern. |
| 334 | |
| 335 | """ |
| 336 | def __init__(self, pattern, tag): |
| 337 | Pattern.__init__(self, pattern) |
| 338 | self.tag = tag |
| 339 | |
| 340 | def handleMatch(self, m): |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 341 | el = etree.Element(self.tag) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 342 | el.text = m.group(3) |
| 343 | return el |
| 344 | |
| 345 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 346 | class SimpleTagInlineProcessor(InlineProcessor): |
| 347 | """ |
| 348 | Return element of type `tag` with a text attribute of group(2) |
| 349 | of a Pattern. |
| 350 | |
| 351 | """ |
| 352 | def __init__(self, pattern, tag): |
| 353 | InlineProcessor.__init__(self, pattern) |
| 354 | self.tag = tag |
| 355 | |
| 356 | def handleMatch(self, m, data): # pragma: no cover |
| 357 | el = etree.Element(self.tag) |
| 358 | el.text = m.group(2) |
| 359 | return el, m.start(0), m.end(0) |
| 360 | |
| 361 | |
| 362 | class SubstituteTagPattern(SimpleTagPattern): # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 363 | """ Return an element of type `tag` with no children. """ |
| 364 | def handleMatch(self, m): |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 365 | return etree.Element(self.tag) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 366 | |
| 367 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 368 | class SubstituteTagInlineProcessor(SimpleTagInlineProcessor): |
| 369 | """ Return an element of type `tag` with no children. """ |
| 370 | def handleMatch(self, m, data): |
| 371 | return etree.Element(self.tag), m.start(0), m.end(0) |
| 372 | |
| 373 | |
| 374 | class BacktickInlineProcessor(InlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 375 | """ Return a `<code>` element containing the matching text. """ |
| 376 | def __init__(self, pattern): |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 377 | InlineProcessor.__init__(self, pattern) |
| 378 | self.ESCAPED_BSLASH = '{}{}{}'.format(util.STX, ord('\\'), util.ETX) |
| 379 | self.tag = 'code' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 380 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 381 | def handleMatch(self, m, data): |
| 382 | if m.group(3): |
| 383 | el = etree.Element(self.tag) |
| 384 | el.text = util.AtomicString(util.code_escape(m.group(3).strip())) |
| 385 | return el, m.start(0), m.end(0) |
| 386 | else: |
| 387 | return m.group(1).replace('\\\\', self.ESCAPED_BSLASH), m.start(0), m.end(0) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 388 | |
| 389 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 390 | class DoubleTagPattern(SimpleTagPattern): # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 391 | """Return a ElementTree element nested in tag2 nested in tag1. |
| 392 | |
| 393 | Useful for strong emphasis etc. |
| 394 | |
| 395 | """ |
| 396 | def handleMatch(self, m): |
| 397 | tag1, tag2 = self.tag.split(",") |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 398 | el1 = etree.Element(tag1) |
| 399 | el2 = etree.SubElement(el1, tag2) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 400 | el2.text = m.group(3) |
| 401 | if len(m.groups()) == 5: |
| 402 | el2.tail = m.group(4) |
| 403 | return el1 |
| 404 | |
| 405 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 406 | class DoubleTagInlineProcessor(SimpleTagInlineProcessor): |
| 407 | """Return a ElementTree element nested in tag2 nested in tag1. |
| 408 | |
| 409 | Useful for strong emphasis etc. |
| 410 | |
| 411 | """ |
| 412 | def handleMatch(self, m, data): # pragma: no cover |
| 413 | tag1, tag2 = self.tag.split(",") |
| 414 | el1 = etree.Element(tag1) |
| 415 | el2 = etree.SubElement(el1, tag2) |
| 416 | el2.text = m.group(2) |
| 417 | if len(m.groups()) == 3: |
| 418 | el2.tail = m.group(3) |
| 419 | return el1, m.start(0), m.end(0) |
| 420 | |
| 421 | |
| 422 | class HtmlInlineProcessor(InlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 423 | """ Store raw inline html and return a placeholder. """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 424 | def handleMatch(self, m, data): |
| 425 | rawhtml = self.unescape(m.group(1)) |
| 426 | place_holder = self.md.htmlStash.store(rawhtml) |
| 427 | return place_holder, m.start(0), m.end(0) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 428 | |
| 429 | def unescape(self, text): |
| 430 | """ Return unescaped text given text with an inline placeholder. """ |
| 431 | try: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 432 | stash = self.md.treeprocessors['inline'].stashed_nodes |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 433 | except KeyError: # pragma: no cover |
| 434 | return text |
| 435 | |
| 436 | def get_stash(m): |
| 437 | id = m.group(1) |
| 438 | value = stash.get(id) |
| 439 | if value is not None: |
| 440 | try: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 441 | return self.md.serializer(value) |
| 442 | except Exception: |
| 443 | return r'\%s' % value |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 444 | |
| 445 | return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) |
| 446 | |
| 447 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 448 | class AsteriskProcessor(InlineProcessor): |
| 449 | """Emphasis processor for handling strong and em matches inside asterisks.""" |
| 450 | |
| 451 | PATTERNS = [ |
| 452 | EmStrongItem(re.compile(EM_STRONG_RE, re.DOTALL | re.UNICODE), 'double', 'strong,em'), |
| 453 | EmStrongItem(re.compile(STRONG_EM_RE, re.DOTALL | re.UNICODE), 'double', 'em,strong'), |
| 454 | EmStrongItem(re.compile(STRONG_EM3_RE, re.DOTALL | re.UNICODE), 'double2', 'strong,em'), |
| 455 | EmStrongItem(re.compile(STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'), |
| 456 | EmStrongItem(re.compile(EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em') |
| 457 | ] |
| 458 | |
| 459 | def build_single(self, m, tag, idx): |
| 460 | """Return single tag.""" |
| 461 | el1 = etree.Element(tag) |
| 462 | text = m.group(2) |
| 463 | self.parse_sub_patterns(text, el1, None, idx) |
| 464 | return el1 |
| 465 | |
| 466 | def build_double(self, m, tags, idx): |
| 467 | """Return double tag.""" |
| 468 | |
| 469 | tag1, tag2 = tags.split(",") |
| 470 | el1 = etree.Element(tag1) |
| 471 | el2 = etree.Element(tag2) |
| 472 | text = m.group(2) |
| 473 | self.parse_sub_patterns(text, el2, None, idx) |
| 474 | el1.append(el2) |
| 475 | if len(m.groups()) == 3: |
| 476 | text = m.group(3) |
| 477 | self.parse_sub_patterns(text, el1, el2, idx) |
| 478 | return el1 |
| 479 | |
| 480 | def build_double2(self, m, tags, idx): |
| 481 | """Return double tags (variant 2): `<strong>text <em>text</em></strong>`.""" |
| 482 | |
| 483 | tag1, tag2 = tags.split(",") |
| 484 | el1 = etree.Element(tag1) |
| 485 | el2 = etree.Element(tag2) |
| 486 | text = m.group(2) |
| 487 | self.parse_sub_patterns(text, el1, None, idx) |
| 488 | text = m.group(3) |
| 489 | el1.append(el2) |
| 490 | self.parse_sub_patterns(text, el2, None, idx) |
| 491 | return el1 |
| 492 | |
| 493 | def parse_sub_patterns(self, data, parent, last, idx): |
| 494 | """ |
| 495 | Parses sub patterns. |
| 496 | |
| 497 | `data` (`str`): |
| 498 | text to evaluate. |
| 499 | |
| 500 | `parent` (`etree.Element`): |
| 501 | Parent to attach text and sub elements to. |
| 502 | |
| 503 | `last` (`etree.Element`): |
| 504 | Last appended child to parent. Can also be None if parent has no children. |
| 505 | |
| 506 | `idx` (`int`): |
| 507 | Current pattern index that was used to evaluate the parent. |
| 508 | |
| 509 | """ |
| 510 | |
| 511 | offset = 0 |
| 512 | pos = 0 |
| 513 | |
| 514 | length = len(data) |
| 515 | while pos < length: |
| 516 | # Find the start of potential emphasis or strong tokens |
| 517 | if self.compiled_re.match(data, pos): |
| 518 | matched = False |
| 519 | # See if the we can match an emphasis/strong pattern |
| 520 | for index, item in enumerate(self.PATTERNS): |
| 521 | # Only evaluate patterns that are after what was used on the parent |
| 522 | if index <= idx: |
| 523 | continue |
| 524 | m = item.pattern.match(data, pos) |
| 525 | if m: |
| 526 | # Append child nodes to parent |
| 527 | # Text nodes should be appended to the last |
| 528 | # child if present, and if not, it should |
| 529 | # be added as the parent's text node. |
| 530 | text = data[offset:m.start(0)] |
| 531 | if text: |
| 532 | if last is not None: |
| 533 | last.tail = text |
| 534 | else: |
| 535 | parent.text = text |
| 536 | el = self.build_element(m, item.builder, item.tags, index) |
| 537 | parent.append(el) |
| 538 | last = el |
| 539 | # Move our position past the matched hunk |
| 540 | offset = pos = m.end(0) |
| 541 | matched = True |
| 542 | if not matched: |
| 543 | # We matched nothing, move on to the next character |
| 544 | pos += 1 |
| 545 | else: |
| 546 | # Increment position as no potential emphasis start was found. |
| 547 | pos += 1 |
| 548 | |
| 549 | # Append any leftover text as a text node. |
| 550 | text = data[offset:] |
| 551 | if text: |
| 552 | if last is not None: |
| 553 | last.tail = text |
| 554 | else: |
| 555 | parent.text = text |
| 556 | |
| 557 | def build_element(self, m, builder, tags, index): |
| 558 | """Element builder.""" |
| 559 | |
| 560 | if builder == 'double2': |
| 561 | return self.build_double2(m, tags, index) |
| 562 | elif builder == 'double': |
| 563 | return self.build_double(m, tags, index) |
| 564 | else: |
| 565 | return self.build_single(m, tags, index) |
| 566 | |
| 567 | def handleMatch(self, m, data): |
| 568 | """Parse patterns.""" |
| 569 | |
| 570 | el = None |
| 571 | start = None |
| 572 | end = None |
| 573 | |
| 574 | for index, item in enumerate(self.PATTERNS): |
| 575 | m1 = item.pattern.match(data, m.start(0)) |
| 576 | if m1: |
| 577 | start = m1.start(0) |
| 578 | end = m1.end(0) |
| 579 | el = self.build_element(m1, item.builder, item.tags, index) |
| 580 | break |
| 581 | return el, start, end |
| 582 | |
| 583 | |
| 584 | class UnderscoreProcessor(AsteriskProcessor): |
| 585 | """Emphasis processor for handling strong and em matches inside underscores.""" |
| 586 | |
| 587 | PATTERNS = [ |
| 588 | EmStrongItem(re.compile(EM_STRONG2_RE, re.DOTALL | re.UNICODE), 'double', 'strong,em'), |
| 589 | EmStrongItem(re.compile(STRONG_EM2_RE, re.DOTALL | re.UNICODE), 'double', 'em,strong'), |
| 590 | EmStrongItem(re.compile(SMART_STRONG_EM_RE, re.DOTALL | re.UNICODE), 'double2', 'strong,em'), |
| 591 | EmStrongItem(re.compile(SMART_STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'), |
| 592 | EmStrongItem(re.compile(SMART_EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em') |
| 593 | ] |
| 594 | |
| 595 | |
| 596 | class LinkInlineProcessor(InlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 597 | """ Return a link element from the given match. """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 598 | RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE) |
| 599 | RE_TITLE_CLEAN = re.compile(r'\s') |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 600 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 601 | def handleMatch(self, m, data): |
| 602 | text, index, handled = self.getText(data, m.end(0)) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 603 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 604 | if not handled: |
| 605 | return None, None, None |
| 606 | |
| 607 | href, title, index, handled = self.getLink(data, index) |
| 608 | if not handled: |
| 609 | return None, None, None |
| 610 | |
| 611 | el = etree.Element("a") |
| 612 | el.text = text |
| 613 | |
| 614 | el.set("href", href) |
| 615 | |
| 616 | if title is not None: |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 617 | el.set("title", title) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 618 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 619 | return el, m.start(0), index |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 620 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 621 | def getLink(self, data, index): |
| 622 | """Parse data between `()` of `[Text]()` allowing recursive `()`. """ |
| 623 | |
| 624 | href = '' |
| 625 | title = None |
| 626 | handled = False |
| 627 | |
| 628 | m = self.RE_LINK.match(data, pos=index) |
| 629 | if m and m.group(1): |
| 630 | # Matches [Text](<link> "title") |
| 631 | href = m.group(1)[1:-1].strip() |
| 632 | if m.group(2): |
| 633 | title = m.group(2)[1:-1] |
| 634 | index = m.end(0) |
| 635 | handled = True |
| 636 | elif m: |
| 637 | # Track bracket nesting and index in string |
| 638 | bracket_count = 1 |
| 639 | backtrack_count = 1 |
| 640 | start_index = m.end() |
| 641 | index = start_index |
| 642 | last_bracket = -1 |
| 643 | |
| 644 | # Primary (first found) quote tracking. |
| 645 | quote = None |
| 646 | start_quote = -1 |
| 647 | exit_quote = -1 |
| 648 | ignore_matches = False |
| 649 | |
| 650 | # Secondary (second found) quote tracking. |
| 651 | alt_quote = None |
| 652 | start_alt_quote = -1 |
| 653 | exit_alt_quote = -1 |
| 654 | |
| 655 | # Track last character |
| 656 | last = '' |
| 657 | |
| 658 | for pos in range(index, len(data)): |
| 659 | c = data[pos] |
| 660 | if c == '(': |
| 661 | # Count nested ( |
| 662 | # Don't increment the bracket count if we are sure we're in a title. |
| 663 | if not ignore_matches: |
| 664 | bracket_count += 1 |
| 665 | elif backtrack_count > 0: |
| 666 | backtrack_count -= 1 |
| 667 | elif c == ')': |
| 668 | # Match nested ) to ( |
| 669 | # Don't decrement if we are sure we are in a title that is unclosed. |
| 670 | if ((exit_quote != -1 and quote == last) or (exit_alt_quote != -1 and alt_quote == last)): |
| 671 | bracket_count = 0 |
| 672 | elif not ignore_matches: |
| 673 | bracket_count -= 1 |
| 674 | elif backtrack_count > 0: |
| 675 | backtrack_count -= 1 |
| 676 | # We've found our backup end location if the title doesn't reslove. |
| 677 | if backtrack_count == 0: |
| 678 | last_bracket = index + 1 |
| 679 | |
| 680 | elif c in ("'", '"'): |
| 681 | # Quote has started |
| 682 | if not quote: |
| 683 | # We'll assume we are now in a title. |
| 684 | # Brackets are quoted, so no need to match them (except for the final one). |
| 685 | ignore_matches = True |
| 686 | backtrack_count = bracket_count |
| 687 | bracket_count = 1 |
| 688 | start_quote = index + 1 |
| 689 | quote = c |
| 690 | # Secondary quote (in case the first doesn't resolve): [text](link'"title") |
| 691 | elif c != quote and not alt_quote: |
| 692 | start_alt_quote = index + 1 |
| 693 | alt_quote = c |
| 694 | # Update primary quote match |
| 695 | elif c == quote: |
| 696 | exit_quote = index + 1 |
| 697 | # Update secondary quote match |
| 698 | elif alt_quote and c == alt_quote: |
| 699 | exit_alt_quote = index + 1 |
| 700 | |
| 701 | index += 1 |
| 702 | |
| 703 | # Link is closed, so let's break out of the loop |
| 704 | if bracket_count == 0: |
| 705 | # Get the title if we closed a title string right before link closed |
| 706 | if exit_quote >= 0 and quote == last: |
| 707 | href = data[start_index:start_quote - 1] |
| 708 | title = ''.join(data[start_quote:exit_quote - 1]) |
| 709 | elif exit_alt_quote >= 0 and alt_quote == last: |
| 710 | href = data[start_index:start_alt_quote - 1] |
| 711 | title = ''.join(data[start_alt_quote:exit_alt_quote - 1]) |
| 712 | else: |
| 713 | href = data[start_index:index - 1] |
| 714 | break |
| 715 | |
| 716 | if c != ' ': |
| 717 | last = c |
| 718 | |
| 719 | # We have a scenario: [test](link"notitle) |
| 720 | # When we enter a string, we stop tracking bracket resolution in the main counter, |
| 721 | # but we do keep a backup counter up until we discover where we might resolve all brackets |
| 722 | # if the title string fails to resolve. |
| 723 | if bracket_count != 0 and backtrack_count == 0: |
| 724 | href = data[start_index:last_bracket - 1] |
| 725 | index = last_bracket |
| 726 | bracket_count = 0 |
| 727 | |
| 728 | handled = bracket_count == 0 |
| 729 | |
| 730 | if title is not None: |
| 731 | title = self.RE_TITLE_CLEAN.sub(' ', dequote(self.unescape(title.strip()))) |
| 732 | |
| 733 | href = self.unescape(href).strip() |
| 734 | |
| 735 | return href, title, index, handled |
| 736 | |
| 737 | def getText(self, data, index): |
| 738 | """Parse the content between `[]` of the start of an image or link |
| 739 | resolving nested square brackets. |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 740 | |
| 741 | """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 742 | bracket_count = 1 |
| 743 | text = [] |
| 744 | for pos in range(index, len(data)): |
| 745 | c = data[pos] |
| 746 | if c == ']': |
| 747 | bracket_count -= 1 |
| 748 | elif c == '[': |
| 749 | bracket_count += 1 |
| 750 | index += 1 |
| 751 | if bracket_count == 0: |
| 752 | break |
| 753 | text.append(c) |
| 754 | return ''.join(text), index, bracket_count == 0 |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 755 | |
| 756 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 757 | class ImageInlineProcessor(LinkInlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 758 | """ Return a img element from the given match. """ |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 759 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 760 | def handleMatch(self, m, data): |
| 761 | text, index, handled = self.getText(data, m.end(0)) |
| 762 | if not handled: |
| 763 | return None, None, None |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 764 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 765 | src, title, index, handled = self.getLink(data, index) |
| 766 | if not handled: |
| 767 | return None, None, None |
| 768 | |
| 769 | el = etree.Element("img") |
| 770 | |
| 771 | el.set("src", src) |
| 772 | |
| 773 | if title is not None: |
| 774 | el.set("title", title) |
| 775 | |
| 776 | el.set('alt', self.unescape(text)) |
| 777 | return el, m.start(0), index |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 778 | |
| 779 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 780 | class ReferenceInlineProcessor(LinkInlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 781 | """ Match to a stored reference and return link element. """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 782 | NEWLINE_CLEANUP_RE = re.compile(r'\s+', re.MULTILINE) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 783 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 784 | RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL | re.UNICODE) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 785 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 786 | def handleMatch(self, m, data): |
| 787 | text, index, handled = self.getText(data, m.end(0)) |
| 788 | if not handled: |
| 789 | return None, None, None |
| 790 | |
| 791 | id, end, handled = self.evalId(data, index, text) |
| 792 | if not handled: |
| 793 | return None, None, None |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 794 | |
| 795 | # Clean up linebreaks in id |
| 796 | id = self.NEWLINE_CLEANUP_RE.sub(' ', id) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 797 | if id not in self.md.references: # ignore undefined refs |
| 798 | return None, m.start(0), end |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 799 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 800 | href, title = self.md.references[id] |
| 801 | |
| 802 | return self.makeTag(href, title, text), m.start(0), end |
| 803 | |
| 804 | def evalId(self, data, index, text): |
| 805 | """ |
| 806 | Evaluate the id portion of [ref][id]. |
| 807 | |
| 808 | If [ref][] use [ref]. |
| 809 | """ |
| 810 | m = self.RE_LINK.match(data, pos=index) |
| 811 | if not m: |
| 812 | return None, index, False |
| 813 | else: |
| 814 | id = m.group(1).lower() |
| 815 | end = m.end(0) |
| 816 | if not id: |
| 817 | id = text.lower() |
| 818 | return id, end, True |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 819 | |
| 820 | def makeTag(self, href, title, text): |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 821 | el = etree.Element('a') |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 822 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 823 | el.set('href', href) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 824 | if title: |
| 825 | el.set('title', title) |
| 826 | |
| 827 | el.text = text |
| 828 | return el |
| 829 | |
| 830 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 831 | class ShortReferenceInlineProcessor(ReferenceInlineProcessor): |
| 832 | """Short form of reference: [google]. """ |
| 833 | def evalId(self, data, index, text): |
| 834 | """Evaluate the id from of [ref] """ |
| 835 | |
| 836 | return text.lower(), index, True |
| 837 | |
| 838 | |
| 839 | class ImageReferenceInlineProcessor(ReferenceInlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 840 | """ Match to a stored reference and return img element. """ |
| 841 | def makeTag(self, href, title, text): |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 842 | el = etree.Element("img") |
| 843 | el.set("src", href) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 844 | if title: |
| 845 | el.set("title", title) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 846 | el.set("alt", self.unescape(text)) |
| 847 | return el |
| 848 | |
| 849 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 850 | class ShortImageReferenceInlineProcessor(ImageReferenceInlineProcessor): |
| 851 | """ Short form of inage reference: ![ref]. """ |
| 852 | def evalId(self, data, index, text): |
| 853 | """Evaluate the id from of [ref] """ |
| 854 | |
| 855 | return text.lower(), index, True |
| 856 | |
| 857 | |
| 858 | class AutolinkInlineProcessor(InlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 859 | """ Return a link Element given an autolink (`<http://example/com>`). """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 860 | def handleMatch(self, m, data): |
| 861 | el = etree.Element("a") |
| 862 | el.set('href', self.unescape(m.group(1))) |
| 863 | el.text = util.AtomicString(m.group(1)) |
| 864 | return el, m.start(0), m.end(0) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 865 | |
| 866 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 867 | class AutomailInlineProcessor(InlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 868 | """ |
| 869 | Return a mailto link Element given an automail link (`<foo@example.com>`). |
| 870 | """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 871 | def handleMatch(self, m, data): |
| 872 | el = etree.Element('a') |
| 873 | email = self.unescape(m.group(1)) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 874 | if email.startswith("mailto:"): |
| 875 | email = email[len("mailto:"):] |
| 876 | |
| 877 | def codepoint2name(code): |
| 878 | """Return entity definition by code, or the code if not defined.""" |
| 879 | entity = entities.codepoint2name.get(code) |
| 880 | if entity: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 881 | return "{}{};".format(util.AMP_SUBSTITUTE, entity) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 882 | else: |
| 883 | return "%s#%d;" % (util.AMP_SUBSTITUTE, code) |
| 884 | |
| 885 | letters = [codepoint2name(ord(letter)) for letter in email] |
| 886 | el.text = util.AtomicString(''.join(letters)) |
| 887 | |
| 888 | mailto = "mailto:" + email |
| 889 | mailto = "".join([util.AMP_SUBSTITUTE + '#%d;' % |
| 890 | ord(letter) for letter in mailto]) |
| 891 | el.set('href', mailto) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 892 | return el, m.start(0), m.end(0) |