dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame^] | 1 | """ |
| 2 | POST-PROCESSORS |
| 3 | ============================================================================= |
| 4 | |
| 5 | Markdown also allows post-processors, which are similar to preprocessors in |
| 6 | that they need to implement a "run" method. However, they are run after core |
| 7 | processing. |
| 8 | |
| 9 | """ |
| 10 | |
| 11 | from __future__ import absolute_import |
| 12 | from __future__ import unicode_literals |
| 13 | from . import util |
| 14 | from . import odict |
| 15 | import re |
| 16 | |
| 17 | |
| 18 | def build_postprocessors(md_instance, **kwargs): |
| 19 | """ Build the default postprocessors for Markdown. """ |
| 20 | postprocessors = odict.OrderedDict() |
| 21 | postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance) |
| 22 | postprocessors["amp_substitute"] = AndSubstitutePostprocessor() |
| 23 | postprocessors["unescape"] = UnescapePostprocessor() |
| 24 | return postprocessors |
| 25 | |
| 26 | |
| 27 | class Postprocessor(util.Processor): |
| 28 | """ |
| 29 | Postprocessors are run after the ElementTree it converted back into text. |
| 30 | |
| 31 | Each Postprocessor implements a "run" method that takes a pointer to a |
| 32 | text string, modifies it as necessary and returns a text string. |
| 33 | |
| 34 | Postprocessors must extend markdown.Postprocessor. |
| 35 | |
| 36 | """ |
| 37 | |
| 38 | def run(self, text): |
| 39 | """ |
| 40 | Subclasses of Postprocessor should implement a `run` method, which |
| 41 | takes the html document as a single text string and returns a |
| 42 | (possibly modified) string. |
| 43 | |
| 44 | """ |
| 45 | pass # pragma: no cover |
| 46 | |
| 47 | |
| 48 | class RawHtmlPostprocessor(Postprocessor): |
| 49 | """ Restore raw html to the document. """ |
| 50 | |
| 51 | def run(self, text): |
| 52 | """ Iterate over html stash and restore "safe" html. """ |
| 53 | for i in range(self.markdown.htmlStash.html_counter): |
| 54 | html, safe = self.markdown.htmlStash.rawHtmlBlocks[i] |
| 55 | if self.markdown.safeMode and not safe: |
| 56 | if str(self.markdown.safeMode).lower() == 'escape': |
| 57 | html = self.escape(html) |
| 58 | elif str(self.markdown.safeMode).lower() == 'remove': |
| 59 | html = '' |
| 60 | else: |
| 61 | html = self.markdown.html_replacement_text |
| 62 | if (self.isblocklevel(html) and |
| 63 | (safe or not self.markdown.safeMode)): |
| 64 | text = text.replace( |
| 65 | "<p>%s</p>" % |
| 66 | (self.markdown.htmlStash.get_placeholder(i)), |
| 67 | html + "\n" |
| 68 | ) |
| 69 | text = text.replace( |
| 70 | self.markdown.htmlStash.get_placeholder(i), html |
| 71 | ) |
| 72 | return text |
| 73 | |
| 74 | def escape(self, html): |
| 75 | """ Basic html escaping """ |
| 76 | html = html.replace('&', '&') |
| 77 | html = html.replace('<', '<') |
| 78 | html = html.replace('>', '>') |
| 79 | return html.replace('"', '"') |
| 80 | |
| 81 | def isblocklevel(self, html): |
| 82 | m = re.match(r'^\<\/?([^ >]+)', html) |
| 83 | if m: |
| 84 | if m.group(1)[0] in ('!', '?', '@', '%'): |
| 85 | # Comment, php etc... |
| 86 | return True |
| 87 | return util.isBlockLevel(m.group(1)) |
| 88 | return False |
| 89 | |
| 90 | |
| 91 | class AndSubstitutePostprocessor(Postprocessor): |
| 92 | """ Restore valid entities """ |
| 93 | |
| 94 | def run(self, text): |
| 95 | text = text.replace(util.AMP_SUBSTITUTE, "&") |
| 96 | return text |
| 97 | |
| 98 | |
| 99 | class UnescapePostprocessor(Postprocessor): |
| 100 | """ Restore escaped chars """ |
| 101 | |
| 102 | RE = re.compile('%s(\d+)%s' % (util.STX, util.ETX)) |
| 103 | |
| 104 | def unescape(self, m): |
| 105 | return util.int2str(int(m.group(1))) |
| 106 | |
| 107 | def run(self, text): |
| 108 | return self.RE.sub(self.unescape, text) |