Re-land "Check in a simple pure-python based Markdown previewer."

This re-lands #352450 with a fix to make checklicenses.py happy.

R=thestig@chromium.org
TBR=jam@chromium.org

Review URL: https://codereview.chromium.org/1392733002

Cr-Original-Commit-Position: refs/heads/master@{#352731}
Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src
Cr-Mirrored-Commit: 27c171cd168807f85b95ae8aaa797bda02eff319
diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py
new file mode 100644
index 0000000..2d4dcb5
--- /dev/null
+++ b/markdown/postprocessors.py
@@ -0,0 +1,108 @@
+"""
+POST-PROCESSORS
+=============================================================================
+
+Markdown also allows post-processors, which are similar to preprocessors in
+that they need to implement a "run" method. However, they are run after core
+processing.
+
+"""
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import util
+from . import odict
+import re
+
+
+def build_postprocessors(md_instance, **kwargs):
+    """ Build the default postprocessors for Markdown. """
+    postprocessors = odict.OrderedDict()
+    postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance)
+    postprocessors["amp_substitute"] = AndSubstitutePostprocessor()
+    postprocessors["unescape"] = UnescapePostprocessor()
+    return postprocessors
+
+
+class Postprocessor(util.Processor):
+    """
+    Postprocessors are run after the ElementTree it converted back into text.
+
+    Each Postprocessor implements a "run" method that takes a pointer to a
+    text string, modifies it as necessary and returns a text string.
+
+    Postprocessors must extend markdown.Postprocessor.
+
+    """
+
+    def run(self, text):
+        """
+        Subclasses of Postprocessor should implement a `run` method, which
+        takes the html document as a single text string and returns a
+        (possibly modified) string.
+
+        """
+        pass  # pragma: no cover
+
+
+class RawHtmlPostprocessor(Postprocessor):
+    """ Restore raw html to the document. """
+
+    def run(self, text):
+        """ Iterate over html stash and restore "safe" html. """
+        for i in range(self.markdown.htmlStash.html_counter):
+            html, safe = self.markdown.htmlStash.rawHtmlBlocks[i]
+            if self.markdown.safeMode and not safe:
+                if str(self.markdown.safeMode).lower() == 'escape':
+                    html = self.escape(html)
+                elif str(self.markdown.safeMode).lower() == 'remove':
+                    html = ''
+                else:
+                    html = self.markdown.html_replacement_text
+            if (self.isblocklevel(html) and
+               (safe or not self.markdown.safeMode)):
+                text = text.replace(
+                    "<p>%s</p>" %
+                    (self.markdown.htmlStash.get_placeholder(i)),
+                    html + "\n"
+                )
+            text = text.replace(
+                self.markdown.htmlStash.get_placeholder(i), html
+            )
+        return text
+
+    def escape(self, html):
+        """ Basic html escaping """
+        html = html.replace('&', '&amp;')
+        html = html.replace('<', '&lt;')
+        html = html.replace('>', '&gt;')
+        return html.replace('"', '&quot;')
+
+    def isblocklevel(self, html):
+        m = re.match(r'^\<\/?([^ >]+)', html)
+        if m:
+            if m.group(1)[0] in ('!', '?', '@', '%'):
+                # Comment, php etc...
+                return True
+            return util.isBlockLevel(m.group(1))
+        return False
+
+
+class AndSubstitutePostprocessor(Postprocessor):
+    """ Restore valid entities """
+
+    def run(self, text):
+        text = text.replace(util.AMP_SUBSTITUTE, "&")
+        return text
+
+
+class UnescapePostprocessor(Postprocessor):
+    """ Restore escaped chars """
+
+    RE = re.compile('%s(\d+)%s' % (util.STX, util.ETX))
+
+    def unescape(self, m):
+        return util.int2str(int(m.group(1)))
+
+    def run(self, text):
+        return self.RE.sub(self.unescape, text)