Re-land "Check in a simple pure-python based Markdown previewer."
This re-lands #352450 with a fix to make checklicenses.py happy.
R=thestig@chromium.org
TBR=jam@chromium.org
Review URL: https://codereview.chromium.org/1392733002
Cr-Original-Commit-Position: refs/heads/master@{#352731}
Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src
Cr-Mirrored-Commit: 27c171cd168807f85b95ae8aaa797bda02eff319
diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py
new file mode 100644
index 0000000..2d4dcb5
--- /dev/null
+++ b/markdown/postprocessors.py
@@ -0,0 +1,108 @@
+"""
+POST-PROCESSORS
+=============================================================================
+
+Markdown also allows post-processors, which are similar to preprocessors in
+that they need to implement a "run" method. However, they are run after core
+processing.
+
+"""
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import util
+from . import odict
+import re
+
+
+def build_postprocessors(md_instance, **kwargs):
+ """ Build the default postprocessors for Markdown. """
+ postprocessors = odict.OrderedDict()
+ postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance)
+ postprocessors["amp_substitute"] = AndSubstitutePostprocessor()
+ postprocessors["unescape"] = UnescapePostprocessor()
+ return postprocessors
+
+
+class Postprocessor(util.Processor):
+ """
+ Postprocessors are run after the ElementTree it converted back into text.
+
+ Each Postprocessor implements a "run" method that takes a pointer to a
+ text string, modifies it as necessary and returns a text string.
+
+ Postprocessors must extend markdown.Postprocessor.
+
+ """
+
+ def run(self, text):
+ """
+ Subclasses of Postprocessor should implement a `run` method, which
+ takes the html document as a single text string and returns a
+ (possibly modified) string.
+
+ """
+ pass # pragma: no cover
+
+
+class RawHtmlPostprocessor(Postprocessor):
+ """ Restore raw html to the document. """
+
+ def run(self, text):
+ """ Iterate over html stash and restore "safe" html. """
+ for i in range(self.markdown.htmlStash.html_counter):
+ html, safe = self.markdown.htmlStash.rawHtmlBlocks[i]
+ if self.markdown.safeMode and not safe:
+ if str(self.markdown.safeMode).lower() == 'escape':
+ html = self.escape(html)
+ elif str(self.markdown.safeMode).lower() == 'remove':
+ html = ''
+ else:
+ html = self.markdown.html_replacement_text
+ if (self.isblocklevel(html) and
+ (safe or not self.markdown.safeMode)):
+ text = text.replace(
+ "<p>%s</p>" %
+ (self.markdown.htmlStash.get_placeholder(i)),
+ html + "\n"
+ )
+ text = text.replace(
+ self.markdown.htmlStash.get_placeholder(i), html
+ )
+ return text
+
+ def escape(self, html):
+ """ Basic html escaping """
+ html = html.replace('&', '&')
+ html = html.replace('<', '<')
+ html = html.replace('>', '>')
+ return html.replace('"', '"')
+
+ def isblocklevel(self, html):
+ m = re.match(r'^\<\/?([^ >]+)', html)
+ if m:
+ if m.group(1)[0] in ('!', '?', '@', '%'):
+ # Comment, php etc...
+ return True
+ return util.isBlockLevel(m.group(1))
+ return False
+
+
+class AndSubstitutePostprocessor(Postprocessor):
+ """ Restore valid entities """
+
+ def run(self, text):
+ text = text.replace(util.AMP_SUBSTITUTE, "&")
+ return text
+
+
+class UnescapePostprocessor(Postprocessor):
+ """ Restore escaped chars """
+
+ RE = re.compile('%s(\d+)%s' % (util.STX, util.ETX))
+
+ def unescape(self, m):
+ return util.int2str(int(m.group(1)))
+
+ def run(self, text):
+ return self.RE.sub(self.unescape, text)