dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 1 | """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 2 | Python Markdown |
| 3 | |
| 4 | A Python implementation of John Gruber's Markdown. |
| 5 | |
| 6 | Documentation: https://python-markdown.github.io/ |
| 7 | GitHub: https://github.com/Python-Markdown/markdown/ |
| 8 | PyPI: https://pypi.org/project/Markdown/ |
| 9 | |
| 10 | Started by Manfred Stienstra (http://www.dwerg.net/). |
| 11 | Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). |
| 12 | Currently maintained by Waylan Limberg (https://github.com/waylan), |
| 13 | Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). |
| 14 | |
| 15 | Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) |
| 16 | Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) |
| 17 | Copyright 2004 Manfred Stienstra (the original version) |
| 18 | |
| 19 | License: BSD (see LICENSE.md for details). |
| 20 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 21 | INLINE PATTERNS |
| 22 | ============================================================================= |
| 23 | |
| 24 | Inline patterns such as *emphasis* are handled by means of auxiliary |
| 25 | objects, one per pattern. Pattern objects must be instances of classes |
| 26 | that extend markdown.Pattern. Each pattern object uses a single regular |
| 27 | expression and needs support the following methods: |
| 28 | |
| 29 | pattern.getCompiledRegExp() # returns a regular expression |
| 30 | |
| 31 | pattern.handleMatch(m) # takes a match object and returns |
| 32 | # an ElementTree element or just plain text |
| 33 | |
| 34 | All of python markdown's built-in patterns subclass from Pattern, |
| 35 | but you can add additional patterns that don't. |
| 36 | |
| 37 | Also note that all the regular expressions used by inline must |
| 38 | capture the whole block. For this reason, they all start with |
| 39 | '^(.*)' and end with '(.*)!'. In case with built-in expression |
| 40 | Pattern takes care of adding the "^(.*)" and "(.*)!". |
| 41 | |
| 42 | Finally, the order in which regular expressions are applied is very |
| 43 | important - e.g. if we first replace http://.../ links with <a> tags |
| 44 | and _then_ try to replace inline html, we would end up with a mess. |
| 45 | So, we apply the expressions in the following order: |
| 46 | |
| 47 | * escape and backticks have to go before everything else, so |
| 48 | that we can preempt any markdown patterns by escaping them. |
| 49 | |
| 50 | * then we handle auto-links (must be done before inline html) |
| 51 | |
| 52 | * then we handle inline HTML. At this point we will simply |
| 53 | replace all inline HTML strings with a placeholder and add |
| 54 | the actual HTML to a hash. |
| 55 | |
| 56 | * then inline images (must be done before links) |
| 57 | |
| 58 | * then bracketed links, first regular then reference-style |
| 59 | |
| 60 | * finally we apply strong and emphasis |
| 61 | """ |
| 62 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 63 | from . import util |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 64 | from collections import namedtuple |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 65 | import re |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 66 | import xml.etree.ElementTree as etree |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 67 | try: # pragma: no cover |
| 68 | from html import entities |
| 69 | except ImportError: # pragma: no cover |
| 70 | import htmlentitydefs as entities |
| 71 | |
| 72 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 73 | def build_inlinepatterns(md, **kwargs): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 74 | """ Build the default set of inline patterns for Markdown. """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 75 | inlinePatterns = util.Registry() |
| 76 | inlinePatterns.register(BacktickInlineProcessor(BACKTICK_RE), 'backtick', 190) |
| 77 | inlinePatterns.register(EscapeInlineProcessor(ESCAPE_RE, md), 'escape', 180) |
| 78 | inlinePatterns.register(ReferenceInlineProcessor(REFERENCE_RE, md), 'reference', 170) |
| 79 | inlinePatterns.register(LinkInlineProcessor(LINK_RE, md), 'link', 160) |
| 80 | inlinePatterns.register(ImageInlineProcessor(IMAGE_LINK_RE, md), 'image_link', 150) |
| 81 | inlinePatterns.register( |
| 82 | ImageReferenceInlineProcessor(IMAGE_REFERENCE_RE, md), 'image_reference', 140 |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 83 | ) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 84 | inlinePatterns.register( |
| 85 | ShortReferenceInlineProcessor(REFERENCE_RE, md), 'short_reference', 130 |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 86 | ) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 87 | inlinePatterns.register( |
| 88 | ShortImageReferenceInlineProcessor(IMAGE_REFERENCE_RE, md), 'short_image_ref', 125 |
| 89 | ) |
| 90 | inlinePatterns.register(AutolinkInlineProcessor(AUTOLINK_RE, md), 'autolink', 120) |
| 91 | inlinePatterns.register(AutomailInlineProcessor(AUTOMAIL_RE, md), 'automail', 110) |
| 92 | inlinePatterns.register(SubstituteTagInlineProcessor(LINE_BREAK_RE, 'br'), 'linebreak', 100) |
| 93 | inlinePatterns.register(HtmlInlineProcessor(HTML_RE, md), 'html', 90) |
| 94 | inlinePatterns.register(HtmlInlineProcessor(ENTITY_RE, md), 'entity', 80) |
| 95 | inlinePatterns.register(SimpleTextInlineProcessor(NOT_STRONG_RE), 'not_strong', 70) |
| 96 | inlinePatterns.register(AsteriskProcessor(r'\*'), 'em_strong', 60) |
| 97 | inlinePatterns.register(UnderscoreProcessor(r'_'), 'em_strong2', 50) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 98 | return inlinePatterns |
| 99 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 100 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 101 | """ |
| 102 | The actual regular expressions for patterns |
| 103 | ----------------------------------------------------------------------------- |
| 104 | """ |
| 105 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 106 | NOIMG = r'(?<!\!)' |
| 107 | |
| 108 | # `e=f()` or ``e=f("`")`` |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 109 | BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\2(?!`))' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 110 | |
| 111 | # \< |
| 112 | ESCAPE_RE = r'\\(.)' |
| 113 | |
| 114 | # *emphasis* |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 115 | EMPHASIS_RE = r'(\*)([^\*]+)\1' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 116 | |
| 117 | # **strong** |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 118 | STRONG_RE = r'(\*{2})(.+?)\1' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 119 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 120 | # __smart__strong__ |
| 121 | SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\1(?!\w)' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 122 | |
| 123 | # _smart_emphasis_ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 124 | SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\1(?!\w)' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 125 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 126 | # __strong _em__ |
| 127 | SMART_STRONG_EM_RE = r'(?<!\w)(\_)\1(?!\1)(.+?)(?<!\w)\1(?!\1)(.+?)\1{3}(?!\w)' |
| 128 | |
| 129 | # ***strongem*** or ***em*strong** |
| 130 | EM_STRONG_RE = r'(\*)\1{2}(.+?)\1(.*?)\1{2}' |
| 131 | |
| 132 | # ___strongem___ or ___em_strong__ |
| 133 | EM_STRONG2_RE = r'(_)\1{2}(.+?)\1(.*?)\1{2}' |
| 134 | |
| 135 | # ***strong**em* |
| 136 | STRONG_EM_RE = r'(\*)\1{2}(.+?)\1{2}(.*?)\1' |
| 137 | |
| 138 | # ___strong__em_ |
| 139 | STRONG_EM2_RE = r'(_)\1{2}(.+?)\1{2}(.*?)\1' |
| 140 | |
| 141 | # **strong*em*** |
| 142 | STRONG_EM3_RE = r'(\*)\1(?!\1)([^*]+?)\1(?!\1)(.+?)\1{3}' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 143 | |
| 144 | # [text](url) or [text](<url>) or [text](url "title") |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 145 | LINK_RE = NOIMG + r'\[' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 146 | |
| 147 | #  or  |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 148 | IMAGE_LINK_RE = r'\!\[' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 149 | |
| 150 | # [Google][3] |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 151 | REFERENCE_RE = LINK_RE |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 152 | |
| 153 | # ![alt text][2] |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 154 | IMAGE_REFERENCE_RE = IMAGE_LINK_RE |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 155 | |
| 156 | # stand-alone * or _ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 157 | NOT_STRONG_RE = r'((^|\s)(\*|_)(\s|$))' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 158 | |
| 159 | # <http://www.123.com> |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 160 | AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 161 | |
| 162 | # <me@example.com> |
揚帆起航 | 0f44735 | 2022-11-28 22:32:11 +0000 | [diff] [blame^] | 163 | AUTOMAIL_RE = r'<([^<> !]+@[^@<> ]+)>' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 164 | |
| 165 | # <...> |
揚帆起航 | 0f44735 | 2022-11-28 22:32:11 +0000 | [diff] [blame^] | 166 | HTML_RE = r'(<(\/?[a-zA-Z][^<>@ ]*( [^<>]*)?|!--(?:(?!<!--|-->).)*--)>)' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 167 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 168 | # "&" (decimal) or "&" (hex) or "&" (named) |
| 169 | ENTITY_RE = r'(&(?:\#[0-9]+|\#x[0-9a-fA-F]+|[a-zA-Z0-9]+);)' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 170 | |
| 171 | # two spaces at end of line |
| 172 | LINE_BREAK_RE = r' \n' |
| 173 | |
| 174 | |
| 175 | def dequote(string): |
| 176 | """Remove quotes from around a string.""" |
| 177 | if ((string.startswith('"') and string.endswith('"')) or |
| 178 | (string.startswith("'") and string.endswith("'"))): |
| 179 | return string[1:-1] |
| 180 | else: |
| 181 | return string |
| 182 | |
| 183 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 184 | class EmStrongItem(namedtuple('EmStrongItem', ['pattern', 'builder', 'tags'])): |
| 185 | """Emphasis/strong pattern item.""" |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 186 | |
| 187 | |
| 188 | """ |
| 189 | The pattern classes |
| 190 | ----------------------------------------------------------------------------- |
| 191 | """ |
| 192 | |
| 193 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 194 | class Pattern: # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 195 | """Base class that inline patterns subclass. """ |
| 196 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 197 | ANCESTOR_EXCLUDES = tuple() |
| 198 | |
| 199 | def __init__(self, pattern, md=None): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 200 | """ |
| 201 | Create an instant of an inline pattern. |
| 202 | |
| 203 | Keyword arguments: |
| 204 | |
| 205 | * pattern: A regular expression that matches a pattern |
| 206 | |
| 207 | """ |
| 208 | self.pattern = pattern |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 209 | self.compiled_re = re.compile(r"^(.*?)%s(.*)$" % pattern, |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 210 | re.DOTALL | re.UNICODE) |
| 211 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 212 | self.md = md |
| 213 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 214 | def getCompiledRegExp(self): |
| 215 | """ Return a compiled regular expression. """ |
| 216 | return self.compiled_re |
| 217 | |
| 218 | def handleMatch(self, m): |
| 219 | """Return a ElementTree element from the given match. |
| 220 | |
| 221 | Subclasses should override this method. |
| 222 | |
| 223 | Keyword arguments: |
| 224 | |
| 225 | * m: A re match object containing a match of the pattern. |
| 226 | |
| 227 | """ |
| 228 | pass # pragma: no cover |
| 229 | |
| 230 | def type(self): |
| 231 | """ Return class name, to define pattern type """ |
| 232 | return self.__class__.__name__ |
| 233 | |
| 234 | def unescape(self, text): |
| 235 | """ Return unescaped text given text with an inline placeholder. """ |
| 236 | try: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 237 | stash = self.md.treeprocessors['inline'].stashed_nodes |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 238 | except KeyError: # pragma: no cover |
| 239 | return text |
| 240 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 241 | def get_stash(m): |
| 242 | id = m.group(1) |
| 243 | if id in stash: |
| 244 | value = stash.get(id) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 245 | if isinstance(value, str): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 246 | return value |
| 247 | else: |
| 248 | # An etree Element - return text content only |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 249 | return ''.join(value.itertext()) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 250 | return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) |
| 251 | |
| 252 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 253 | class InlineProcessor(Pattern): |
| 254 | """ |
| 255 | Base class that inline patterns subclass. |
| 256 | |
| 257 | This is the newer style inline processor that uses a more |
| 258 | efficient and flexible search approach. |
| 259 | """ |
| 260 | |
| 261 | def __init__(self, pattern, md=None): |
| 262 | """ |
| 263 | Create an instant of an inline pattern. |
| 264 | |
| 265 | Keyword arguments: |
| 266 | |
| 267 | * pattern: A regular expression that matches a pattern |
| 268 | |
| 269 | """ |
| 270 | self.pattern = pattern |
| 271 | self.compiled_re = re.compile(pattern, re.DOTALL | re.UNICODE) |
| 272 | |
| 273 | # Api for Markdown to pass safe_mode into instance |
| 274 | self.safe_mode = False |
| 275 | self.md = md |
| 276 | |
| 277 | def handleMatch(self, m, data): |
| 278 | """Return a ElementTree element from the given match and the |
| 279 | start and end index of the matched text. |
| 280 | |
| 281 | If `start` and/or `end` are returned as `None`, it will be |
| 282 | assumed that the processor did not find a valid region of text. |
| 283 | |
| 284 | Subclasses should override this method. |
| 285 | |
| 286 | Keyword arguments: |
| 287 | |
| 288 | * m: A re match object containing a match of the pattern. |
| 289 | * data: The buffer current under analysis |
| 290 | |
| 291 | Returns: |
| 292 | |
| 293 | * el: The ElementTree element, text or None. |
| 294 | * start: The start of the region that has been matched or None. |
| 295 | * end: The end of the region that has been matched or None. |
| 296 | |
| 297 | """ |
| 298 | pass # pragma: no cover |
| 299 | |
| 300 | |
| 301 | class SimpleTextPattern(Pattern): # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 302 | """ Return a simple text of group(2) of a Pattern. """ |
| 303 | def handleMatch(self, m): |
| 304 | return m.group(2) |
| 305 | |
| 306 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 307 | class SimpleTextInlineProcessor(InlineProcessor): |
| 308 | """ Return a simple text of group(1) of a Pattern. """ |
| 309 | def handleMatch(self, m, data): |
| 310 | return m.group(1), m.start(0), m.end(0) |
| 311 | |
| 312 | |
| 313 | class EscapeInlineProcessor(InlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 314 | """ Return an escaped character. """ |
| 315 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 316 | def handleMatch(self, m, data): |
| 317 | char = m.group(1) |
| 318 | if char in self.md.ESCAPED_CHARS: |
| 319 | return '{}{}{}'.format(util.STX, ord(char), util.ETX), m.start(0), m.end(0) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 320 | else: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 321 | return None, m.start(0), m.end(0) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 322 | |
| 323 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 324 | class SimpleTagPattern(Pattern): # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 325 | """ |
| 326 | Return element of type `tag` with a text attribute of group(3) |
| 327 | of a Pattern. |
| 328 | |
| 329 | """ |
| 330 | def __init__(self, pattern, tag): |
| 331 | Pattern.__init__(self, pattern) |
| 332 | self.tag = tag |
| 333 | |
| 334 | def handleMatch(self, m): |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 335 | el = etree.Element(self.tag) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 336 | el.text = m.group(3) |
| 337 | return el |
| 338 | |
| 339 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 340 | class SimpleTagInlineProcessor(InlineProcessor): |
| 341 | """ |
| 342 | Return element of type `tag` with a text attribute of group(2) |
| 343 | of a Pattern. |
| 344 | |
| 345 | """ |
| 346 | def __init__(self, pattern, tag): |
| 347 | InlineProcessor.__init__(self, pattern) |
| 348 | self.tag = tag |
| 349 | |
| 350 | def handleMatch(self, m, data): # pragma: no cover |
| 351 | el = etree.Element(self.tag) |
| 352 | el.text = m.group(2) |
| 353 | return el, m.start(0), m.end(0) |
| 354 | |
| 355 | |
| 356 | class SubstituteTagPattern(SimpleTagPattern): # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 357 | """ Return an element of type `tag` with no children. """ |
| 358 | def handleMatch(self, m): |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 359 | return etree.Element(self.tag) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 360 | |
| 361 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 362 | class SubstituteTagInlineProcessor(SimpleTagInlineProcessor): |
| 363 | """ Return an element of type `tag` with no children. """ |
| 364 | def handleMatch(self, m, data): |
| 365 | return etree.Element(self.tag), m.start(0), m.end(0) |
| 366 | |
| 367 | |
| 368 | class BacktickInlineProcessor(InlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 369 | """ Return a `<code>` element containing the matching text. """ |
| 370 | def __init__(self, pattern): |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 371 | InlineProcessor.__init__(self, pattern) |
| 372 | self.ESCAPED_BSLASH = '{}{}{}'.format(util.STX, ord('\\'), util.ETX) |
| 373 | self.tag = 'code' |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 374 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 375 | def handleMatch(self, m, data): |
| 376 | if m.group(3): |
| 377 | el = etree.Element(self.tag) |
| 378 | el.text = util.AtomicString(util.code_escape(m.group(3).strip())) |
| 379 | return el, m.start(0), m.end(0) |
| 380 | else: |
| 381 | return m.group(1).replace('\\\\', self.ESCAPED_BSLASH), m.start(0), m.end(0) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 382 | |
| 383 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 384 | class DoubleTagPattern(SimpleTagPattern): # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 385 | """Return a ElementTree element nested in tag2 nested in tag1. |
| 386 | |
| 387 | Useful for strong emphasis etc. |
| 388 | |
| 389 | """ |
| 390 | def handleMatch(self, m): |
| 391 | tag1, tag2 = self.tag.split(",") |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 392 | el1 = etree.Element(tag1) |
| 393 | el2 = etree.SubElement(el1, tag2) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 394 | el2.text = m.group(3) |
| 395 | if len(m.groups()) == 5: |
| 396 | el2.tail = m.group(4) |
| 397 | return el1 |
| 398 | |
| 399 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 400 | class DoubleTagInlineProcessor(SimpleTagInlineProcessor): |
| 401 | """Return a ElementTree element nested in tag2 nested in tag1. |
| 402 | |
| 403 | Useful for strong emphasis etc. |
| 404 | |
| 405 | """ |
| 406 | def handleMatch(self, m, data): # pragma: no cover |
| 407 | tag1, tag2 = self.tag.split(",") |
| 408 | el1 = etree.Element(tag1) |
| 409 | el2 = etree.SubElement(el1, tag2) |
| 410 | el2.text = m.group(2) |
| 411 | if len(m.groups()) == 3: |
| 412 | el2.tail = m.group(3) |
| 413 | return el1, m.start(0), m.end(0) |
| 414 | |
| 415 | |
| 416 | class HtmlInlineProcessor(InlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 417 | """ Store raw inline html and return a placeholder. """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 418 | def handleMatch(self, m, data): |
| 419 | rawhtml = self.unescape(m.group(1)) |
| 420 | place_holder = self.md.htmlStash.store(rawhtml) |
| 421 | return place_holder, m.start(0), m.end(0) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 422 | |
| 423 | def unescape(self, text): |
| 424 | """ Return unescaped text given text with an inline placeholder. """ |
| 425 | try: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 426 | stash = self.md.treeprocessors['inline'].stashed_nodes |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 427 | except KeyError: # pragma: no cover |
| 428 | return text |
| 429 | |
| 430 | def get_stash(m): |
| 431 | id = m.group(1) |
| 432 | value = stash.get(id) |
| 433 | if value is not None: |
| 434 | try: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 435 | return self.md.serializer(value) |
| 436 | except Exception: |
| 437 | return r'\%s' % value |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 438 | |
| 439 | return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) |
| 440 | |
| 441 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 442 | class AsteriskProcessor(InlineProcessor): |
| 443 | """Emphasis processor for handling strong and em matches inside asterisks.""" |
| 444 | |
| 445 | PATTERNS = [ |
| 446 | EmStrongItem(re.compile(EM_STRONG_RE, re.DOTALL | re.UNICODE), 'double', 'strong,em'), |
| 447 | EmStrongItem(re.compile(STRONG_EM_RE, re.DOTALL | re.UNICODE), 'double', 'em,strong'), |
| 448 | EmStrongItem(re.compile(STRONG_EM3_RE, re.DOTALL | re.UNICODE), 'double2', 'strong,em'), |
| 449 | EmStrongItem(re.compile(STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'), |
| 450 | EmStrongItem(re.compile(EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em') |
| 451 | ] |
| 452 | |
| 453 | def build_single(self, m, tag, idx): |
| 454 | """Return single tag.""" |
| 455 | el1 = etree.Element(tag) |
| 456 | text = m.group(2) |
| 457 | self.parse_sub_patterns(text, el1, None, idx) |
| 458 | return el1 |
| 459 | |
| 460 | def build_double(self, m, tags, idx): |
| 461 | """Return double tag.""" |
| 462 | |
| 463 | tag1, tag2 = tags.split(",") |
| 464 | el1 = etree.Element(tag1) |
| 465 | el2 = etree.Element(tag2) |
| 466 | text = m.group(2) |
| 467 | self.parse_sub_patterns(text, el2, None, idx) |
| 468 | el1.append(el2) |
| 469 | if len(m.groups()) == 3: |
| 470 | text = m.group(3) |
| 471 | self.parse_sub_patterns(text, el1, el2, idx) |
| 472 | return el1 |
| 473 | |
| 474 | def build_double2(self, m, tags, idx): |
| 475 | """Return double tags (variant 2): `<strong>text <em>text</em></strong>`.""" |
| 476 | |
| 477 | tag1, tag2 = tags.split(",") |
| 478 | el1 = etree.Element(tag1) |
| 479 | el2 = etree.Element(tag2) |
| 480 | text = m.group(2) |
| 481 | self.parse_sub_patterns(text, el1, None, idx) |
| 482 | text = m.group(3) |
| 483 | el1.append(el2) |
| 484 | self.parse_sub_patterns(text, el2, None, idx) |
| 485 | return el1 |
| 486 | |
| 487 | def parse_sub_patterns(self, data, parent, last, idx): |
| 488 | """ |
| 489 | Parses sub patterns. |
| 490 | |
| 491 | `data` (`str`): |
| 492 | text to evaluate. |
| 493 | |
| 494 | `parent` (`etree.Element`): |
| 495 | Parent to attach text and sub elements to. |
| 496 | |
| 497 | `last` (`etree.Element`): |
| 498 | Last appended child to parent. Can also be None if parent has no children. |
| 499 | |
| 500 | `idx` (`int`): |
| 501 | Current pattern index that was used to evaluate the parent. |
| 502 | |
| 503 | """ |
| 504 | |
| 505 | offset = 0 |
| 506 | pos = 0 |
| 507 | |
| 508 | length = len(data) |
| 509 | while pos < length: |
| 510 | # Find the start of potential emphasis or strong tokens |
| 511 | if self.compiled_re.match(data, pos): |
| 512 | matched = False |
| 513 | # See if the we can match an emphasis/strong pattern |
| 514 | for index, item in enumerate(self.PATTERNS): |
| 515 | # Only evaluate patterns that are after what was used on the parent |
| 516 | if index <= idx: |
| 517 | continue |
| 518 | m = item.pattern.match(data, pos) |
| 519 | if m: |
| 520 | # Append child nodes to parent |
| 521 | # Text nodes should be appended to the last |
| 522 | # child if present, and if not, it should |
| 523 | # be added as the parent's text node. |
| 524 | text = data[offset:m.start(0)] |
| 525 | if text: |
| 526 | if last is not None: |
| 527 | last.tail = text |
| 528 | else: |
| 529 | parent.text = text |
| 530 | el = self.build_element(m, item.builder, item.tags, index) |
| 531 | parent.append(el) |
| 532 | last = el |
| 533 | # Move our position past the matched hunk |
| 534 | offset = pos = m.end(0) |
| 535 | matched = True |
| 536 | if not matched: |
| 537 | # We matched nothing, move on to the next character |
| 538 | pos += 1 |
| 539 | else: |
| 540 | # Increment position as no potential emphasis start was found. |
| 541 | pos += 1 |
| 542 | |
| 543 | # Append any leftover text as a text node. |
| 544 | text = data[offset:] |
| 545 | if text: |
| 546 | if last is not None: |
| 547 | last.tail = text |
| 548 | else: |
| 549 | parent.text = text |
| 550 | |
| 551 | def build_element(self, m, builder, tags, index): |
| 552 | """Element builder.""" |
| 553 | |
| 554 | if builder == 'double2': |
| 555 | return self.build_double2(m, tags, index) |
| 556 | elif builder == 'double': |
| 557 | return self.build_double(m, tags, index) |
| 558 | else: |
| 559 | return self.build_single(m, tags, index) |
| 560 | |
| 561 | def handleMatch(self, m, data): |
| 562 | """Parse patterns.""" |
| 563 | |
| 564 | el = None |
| 565 | start = None |
| 566 | end = None |
| 567 | |
| 568 | for index, item in enumerate(self.PATTERNS): |
| 569 | m1 = item.pattern.match(data, m.start(0)) |
| 570 | if m1: |
| 571 | start = m1.start(0) |
| 572 | end = m1.end(0) |
| 573 | el = self.build_element(m1, item.builder, item.tags, index) |
| 574 | break |
| 575 | return el, start, end |
| 576 | |
| 577 | |
| 578 | class UnderscoreProcessor(AsteriskProcessor): |
| 579 | """Emphasis processor for handling strong and em matches inside underscores.""" |
| 580 | |
| 581 | PATTERNS = [ |
| 582 | EmStrongItem(re.compile(EM_STRONG2_RE, re.DOTALL | re.UNICODE), 'double', 'strong,em'), |
| 583 | EmStrongItem(re.compile(STRONG_EM2_RE, re.DOTALL | re.UNICODE), 'double', 'em,strong'), |
| 584 | EmStrongItem(re.compile(SMART_STRONG_EM_RE, re.DOTALL | re.UNICODE), 'double2', 'strong,em'), |
| 585 | EmStrongItem(re.compile(SMART_STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'), |
| 586 | EmStrongItem(re.compile(SMART_EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em') |
| 587 | ] |
| 588 | |
| 589 | |
| 590 | class LinkInlineProcessor(InlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 591 | """ Return a link element from the given match. """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 592 | RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE) |
| 593 | RE_TITLE_CLEAN = re.compile(r'\s') |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 594 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 595 | def handleMatch(self, m, data): |
| 596 | text, index, handled = self.getText(data, m.end(0)) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 597 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 598 | if not handled: |
| 599 | return None, None, None |
| 600 | |
| 601 | href, title, index, handled = self.getLink(data, index) |
| 602 | if not handled: |
| 603 | return None, None, None |
| 604 | |
| 605 | el = etree.Element("a") |
| 606 | el.text = text |
| 607 | |
| 608 | el.set("href", href) |
| 609 | |
| 610 | if title is not None: |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 611 | el.set("title", title) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 612 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 613 | return el, m.start(0), index |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 614 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 615 | def getLink(self, data, index): |
| 616 | """Parse data between `()` of `[Text]()` allowing recursive `()`. """ |
| 617 | |
| 618 | href = '' |
| 619 | title = None |
| 620 | handled = False |
| 621 | |
| 622 | m = self.RE_LINK.match(data, pos=index) |
| 623 | if m and m.group(1): |
| 624 | # Matches [Text](<link> "title") |
| 625 | href = m.group(1)[1:-1].strip() |
| 626 | if m.group(2): |
| 627 | title = m.group(2)[1:-1] |
| 628 | index = m.end(0) |
| 629 | handled = True |
| 630 | elif m: |
| 631 | # Track bracket nesting and index in string |
| 632 | bracket_count = 1 |
| 633 | backtrack_count = 1 |
| 634 | start_index = m.end() |
| 635 | index = start_index |
| 636 | last_bracket = -1 |
| 637 | |
| 638 | # Primary (first found) quote tracking. |
| 639 | quote = None |
| 640 | start_quote = -1 |
| 641 | exit_quote = -1 |
| 642 | ignore_matches = False |
| 643 | |
| 644 | # Secondary (second found) quote tracking. |
| 645 | alt_quote = None |
| 646 | start_alt_quote = -1 |
| 647 | exit_alt_quote = -1 |
| 648 | |
| 649 | # Track last character |
| 650 | last = '' |
| 651 | |
| 652 | for pos in range(index, len(data)): |
| 653 | c = data[pos] |
| 654 | if c == '(': |
| 655 | # Count nested ( |
| 656 | # Don't increment the bracket count if we are sure we're in a title. |
| 657 | if not ignore_matches: |
| 658 | bracket_count += 1 |
| 659 | elif backtrack_count > 0: |
| 660 | backtrack_count -= 1 |
| 661 | elif c == ')': |
| 662 | # Match nested ) to ( |
| 663 | # Don't decrement if we are sure we are in a title that is unclosed. |
| 664 | if ((exit_quote != -1 and quote == last) or (exit_alt_quote != -1 and alt_quote == last)): |
| 665 | bracket_count = 0 |
| 666 | elif not ignore_matches: |
| 667 | bracket_count -= 1 |
| 668 | elif backtrack_count > 0: |
| 669 | backtrack_count -= 1 |
揚帆起航 | 0f44735 | 2022-11-28 22:32:11 +0000 | [diff] [blame^] | 670 | # We've found our backup end location if the title doesn't resolve. |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 671 | if backtrack_count == 0: |
| 672 | last_bracket = index + 1 |
| 673 | |
| 674 | elif c in ("'", '"'): |
| 675 | # Quote has started |
| 676 | if not quote: |
| 677 | # We'll assume we are now in a title. |
| 678 | # Brackets are quoted, so no need to match them (except for the final one). |
| 679 | ignore_matches = True |
| 680 | backtrack_count = bracket_count |
| 681 | bracket_count = 1 |
| 682 | start_quote = index + 1 |
| 683 | quote = c |
| 684 | # Secondary quote (in case the first doesn't resolve): [text](link'"title") |
| 685 | elif c != quote and not alt_quote: |
| 686 | start_alt_quote = index + 1 |
| 687 | alt_quote = c |
| 688 | # Update primary quote match |
| 689 | elif c == quote: |
| 690 | exit_quote = index + 1 |
| 691 | # Update secondary quote match |
| 692 | elif alt_quote and c == alt_quote: |
| 693 | exit_alt_quote = index + 1 |
| 694 | |
| 695 | index += 1 |
| 696 | |
| 697 | # Link is closed, so let's break out of the loop |
| 698 | if bracket_count == 0: |
| 699 | # Get the title if we closed a title string right before link closed |
| 700 | if exit_quote >= 0 and quote == last: |
| 701 | href = data[start_index:start_quote - 1] |
| 702 | title = ''.join(data[start_quote:exit_quote - 1]) |
| 703 | elif exit_alt_quote >= 0 and alt_quote == last: |
| 704 | href = data[start_index:start_alt_quote - 1] |
| 705 | title = ''.join(data[start_alt_quote:exit_alt_quote - 1]) |
| 706 | else: |
| 707 | href = data[start_index:index - 1] |
| 708 | break |
| 709 | |
| 710 | if c != ' ': |
| 711 | last = c |
| 712 | |
| 713 | # We have a scenario: [test](link"notitle) |
| 714 | # When we enter a string, we stop tracking bracket resolution in the main counter, |
| 715 | # but we do keep a backup counter up until we discover where we might resolve all brackets |
| 716 | # if the title string fails to resolve. |
| 717 | if bracket_count != 0 and backtrack_count == 0: |
| 718 | href = data[start_index:last_bracket - 1] |
| 719 | index = last_bracket |
| 720 | bracket_count = 0 |
| 721 | |
| 722 | handled = bracket_count == 0 |
| 723 | |
| 724 | if title is not None: |
| 725 | title = self.RE_TITLE_CLEAN.sub(' ', dequote(self.unescape(title.strip()))) |
| 726 | |
| 727 | href = self.unescape(href).strip() |
| 728 | |
| 729 | return href, title, index, handled |
| 730 | |
| 731 | def getText(self, data, index): |
| 732 | """Parse the content between `[]` of the start of an image or link |
| 733 | resolving nested square brackets. |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 734 | |
| 735 | """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 736 | bracket_count = 1 |
| 737 | text = [] |
| 738 | for pos in range(index, len(data)): |
| 739 | c = data[pos] |
| 740 | if c == ']': |
| 741 | bracket_count -= 1 |
| 742 | elif c == '[': |
| 743 | bracket_count += 1 |
| 744 | index += 1 |
| 745 | if bracket_count == 0: |
| 746 | break |
| 747 | text.append(c) |
| 748 | return ''.join(text), index, bracket_count == 0 |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 749 | |
| 750 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 751 | class ImageInlineProcessor(LinkInlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 752 | """ Return a img element from the given match. """ |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 753 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 754 | def handleMatch(self, m, data): |
| 755 | text, index, handled = self.getText(data, m.end(0)) |
| 756 | if not handled: |
| 757 | return None, None, None |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 758 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 759 | src, title, index, handled = self.getLink(data, index) |
| 760 | if not handled: |
| 761 | return None, None, None |
| 762 | |
| 763 | el = etree.Element("img") |
| 764 | |
| 765 | el.set("src", src) |
| 766 | |
| 767 | if title is not None: |
| 768 | el.set("title", title) |
| 769 | |
| 770 | el.set('alt', self.unescape(text)) |
| 771 | return el, m.start(0), index |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 772 | |
| 773 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 774 | class ReferenceInlineProcessor(LinkInlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 775 | """ Match to a stored reference and return link element. """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 776 | NEWLINE_CLEANUP_RE = re.compile(r'\s+', re.MULTILINE) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 777 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 778 | RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL | re.UNICODE) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 779 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 780 | def handleMatch(self, m, data): |
| 781 | text, index, handled = self.getText(data, m.end(0)) |
| 782 | if not handled: |
| 783 | return None, None, None |
| 784 | |
| 785 | id, end, handled = self.evalId(data, index, text) |
| 786 | if not handled: |
| 787 | return None, None, None |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 788 | |
| 789 | # Clean up linebreaks in id |
| 790 | id = self.NEWLINE_CLEANUP_RE.sub(' ', id) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 791 | if id not in self.md.references: # ignore undefined refs |
| 792 | return None, m.start(0), end |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 793 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 794 | href, title = self.md.references[id] |
| 795 | |
| 796 | return self.makeTag(href, title, text), m.start(0), end |
| 797 | |
| 798 | def evalId(self, data, index, text): |
| 799 | """ |
| 800 | Evaluate the id portion of [ref][id]. |
| 801 | |
| 802 | If [ref][] use [ref]. |
| 803 | """ |
| 804 | m = self.RE_LINK.match(data, pos=index) |
| 805 | if not m: |
| 806 | return None, index, False |
| 807 | else: |
| 808 | id = m.group(1).lower() |
| 809 | end = m.end(0) |
| 810 | if not id: |
| 811 | id = text.lower() |
| 812 | return id, end, True |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 813 | |
| 814 | def makeTag(self, href, title, text): |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 815 | el = etree.Element('a') |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 816 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 817 | el.set('href', href) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 818 | if title: |
| 819 | el.set('title', title) |
| 820 | |
| 821 | el.text = text |
| 822 | return el |
| 823 | |
| 824 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 825 | class ShortReferenceInlineProcessor(ReferenceInlineProcessor): |
| 826 | """Short form of reference: [google]. """ |
| 827 | def evalId(self, data, index, text): |
| 828 | """Evaluate the id from of [ref] """ |
| 829 | |
| 830 | return text.lower(), index, True |
| 831 | |
| 832 | |
| 833 | class ImageReferenceInlineProcessor(ReferenceInlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 834 | """ Match to a stored reference and return img element. """ |
| 835 | def makeTag(self, href, title, text): |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 836 | el = etree.Element("img") |
| 837 | el.set("src", href) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 838 | if title: |
| 839 | el.set("title", title) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 840 | el.set("alt", self.unescape(text)) |
| 841 | return el |
| 842 | |
| 843 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 844 | class ShortImageReferenceInlineProcessor(ImageReferenceInlineProcessor): |
| 845 | """ Short form of inage reference: ![ref]. """ |
| 846 | def evalId(self, data, index, text): |
| 847 | """Evaluate the id from of [ref] """ |
| 848 | |
| 849 | return text.lower(), index, True |
| 850 | |
| 851 | |
| 852 | class AutolinkInlineProcessor(InlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 853 | """ Return a link Element given an autolink (`<http://example/com>`). """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 854 | def handleMatch(self, m, data): |
| 855 | el = etree.Element("a") |
| 856 | el.set('href', self.unescape(m.group(1))) |
| 857 | el.text = util.AtomicString(m.group(1)) |
| 858 | return el, m.start(0), m.end(0) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 859 | |
| 860 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 861 | class AutomailInlineProcessor(InlineProcessor): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 862 | """ |
| 863 | Return a mailto link Element given an automail link (`<foo@example.com>`). |
| 864 | """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 865 | def handleMatch(self, m, data): |
| 866 | el = etree.Element('a') |
| 867 | email = self.unescape(m.group(1)) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 868 | if email.startswith("mailto:"): |
| 869 | email = email[len("mailto:"):] |
| 870 | |
| 871 | def codepoint2name(code): |
| 872 | """Return entity definition by code, or the code if not defined.""" |
| 873 | entity = entities.codepoint2name.get(code) |
| 874 | if entity: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 875 | return "{}{};".format(util.AMP_SUBSTITUTE, entity) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 876 | else: |
| 877 | return "%s#%d;" % (util.AMP_SUBSTITUTE, code) |
| 878 | |
| 879 | letters = [codepoint2name(ord(letter)) for letter in email] |
| 880 | el.text = util.AtomicString(''.join(letters)) |
| 881 | |
| 882 | mailto = "mailto:" + email |
| 883 | mailto = "".join([util.AMP_SUBSTITUTE + '#%d;' % |
| 884 | ord(letter) for letter in mailto]) |
| 885 | el.set('href', mailto) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame] | 886 | return el, m.start(0), m.end(0) |