dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame^] | 1 | """ |
| 2 | CodeHilite Extension for Python-Markdown |
| 3 | ======================================== |
| 4 | |
| 5 | Adds code/syntax highlighting to standard Python-Markdown code blocks. |
| 6 | |
| 7 | See <https://pythonhosted.org/Markdown/extensions/code_hilite.html> |
| 8 | for documentation. |
| 9 | |
| 10 | Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/). |
| 11 | |
| 12 | All changes Copyright 2008-2014 The Python Markdown Project |
| 13 | |
| 14 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) |
| 15 | |
| 16 | """ |
| 17 | |
| 18 | from __future__ import absolute_import |
| 19 | from __future__ import unicode_literals |
| 20 | from . import Extension |
| 21 | from ..treeprocessors import Treeprocessor |
| 22 | |
| 23 | try: |
| 24 | from pygments import highlight |
| 25 | from pygments.lexers import get_lexer_by_name, guess_lexer |
| 26 | from pygments.formatters import get_formatter_by_name |
| 27 | pygments = True |
| 28 | except ImportError: |
| 29 | pygments = False |
| 30 | |
| 31 | |
| 32 | def parse_hl_lines(expr): |
| 33 | """Support our syntax for emphasizing certain lines of code. |
| 34 | |
| 35 | expr should be like '1 2' to emphasize lines 1 and 2 of a code block. |
| 36 | Returns a list of ints, the line numbers to emphasize. |
| 37 | """ |
| 38 | if not expr: |
| 39 | return [] |
| 40 | |
| 41 | try: |
| 42 | return list(map(int, expr.split())) |
| 43 | except ValueError: |
| 44 | return [] |
| 45 | |
| 46 | |
| 47 | # ------------------ The Main CodeHilite Class ---------------------- |
| 48 | class CodeHilite(object): |
| 49 | """ |
| 50 | Determine language of source code, and pass it into pygments hilighter. |
| 51 | |
| 52 | Basic Usage: |
| 53 | >>> code = CodeHilite(src = 'some text') |
| 54 | >>> html = code.hilite() |
| 55 | |
| 56 | * src: Source string or any object with a .readline attribute. |
| 57 | |
| 58 | * linenums: (Boolean) Set line numbering to 'on' (True), |
| 59 | 'off' (False) or 'auto'(None). Set to 'auto' by default. |
| 60 | |
| 61 | * guess_lang: (Boolean) Turn language auto-detection |
| 62 | 'on' or 'off' (on by default). |
| 63 | |
| 64 | * css_class: Set class name of wrapper div ('codehilite' by default). |
| 65 | |
| 66 | * hl_lines: (List of integers) Lines to emphasize, 1-indexed. |
| 67 | |
| 68 | Low Level Usage: |
| 69 | >>> code = CodeHilite() |
| 70 | >>> code.src = 'some text' # String or anything with a .readline attr. |
| 71 | >>> code.linenos = True # Turns line numbering on or of. |
| 72 | >>> html = code.hilite() |
| 73 | |
| 74 | """ |
| 75 | |
| 76 | def __init__(self, src=None, linenums=None, guess_lang=True, |
| 77 | css_class="codehilite", lang=None, style='default', |
| 78 | noclasses=False, tab_length=4, hl_lines=None, use_pygments=True): |
| 79 | self.src = src |
| 80 | self.lang = lang |
| 81 | self.linenums = linenums |
| 82 | self.guess_lang = guess_lang |
| 83 | self.css_class = css_class |
| 84 | self.style = style |
| 85 | self.noclasses = noclasses |
| 86 | self.tab_length = tab_length |
| 87 | self.hl_lines = hl_lines or [] |
| 88 | self.use_pygments = use_pygments |
| 89 | |
| 90 | def hilite(self): |
| 91 | """ |
| 92 | Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with |
| 93 | optional line numbers. The output should then be styled with css to |
| 94 | your liking. No styles are applied by default - only styling hooks |
| 95 | (i.e.: <span class="k">). |
| 96 | |
| 97 | returns : A string of html. |
| 98 | |
| 99 | """ |
| 100 | |
| 101 | self.src = self.src.strip('\n') |
| 102 | |
| 103 | if self.lang is None: |
| 104 | self._parseHeader() |
| 105 | |
| 106 | if pygments and self.use_pygments: |
| 107 | try: |
| 108 | lexer = get_lexer_by_name(self.lang) |
| 109 | except ValueError: |
| 110 | try: |
| 111 | if self.guess_lang: |
| 112 | lexer = guess_lexer(self.src) |
| 113 | else: |
| 114 | lexer = get_lexer_by_name('text') |
| 115 | except ValueError: |
| 116 | lexer = get_lexer_by_name('text') |
| 117 | formatter = get_formatter_by_name('html', |
| 118 | linenos=self.linenums, |
| 119 | cssclass=self.css_class, |
| 120 | style=self.style, |
| 121 | noclasses=self.noclasses, |
| 122 | hl_lines=self.hl_lines) |
| 123 | return highlight(self.src, lexer, formatter) |
| 124 | else: |
| 125 | # just escape and build markup usable by JS highlighting libs |
| 126 | txt = self.src.replace('&', '&') |
| 127 | txt = txt.replace('<', '<') |
| 128 | txt = txt.replace('>', '>') |
| 129 | txt = txt.replace('"', '"') |
| 130 | classes = [] |
| 131 | if self.lang: |
| 132 | classes.append('language-%s' % self.lang) |
| 133 | if self.linenums: |
| 134 | classes.append('linenums') |
| 135 | class_str = '' |
| 136 | if classes: |
| 137 | class_str = ' class="%s"' % ' '.join(classes) |
| 138 | return '<pre class="%s"><code%s>%s</code></pre>\n' % \ |
| 139 | (self.css_class, class_str, txt) |
| 140 | |
| 141 | def _parseHeader(self): |
| 142 | """ |
| 143 | Determines language of a code block from shebang line and whether said |
| 144 | line should be removed or left in place. If the sheband line contains a |
| 145 | path (even a single /) then it is assumed to be a real shebang line and |
| 146 | left alone. However, if no path is given (e.i.: #!python or :::python) |
| 147 | then it is assumed to be a mock shebang for language identifitation of |
| 148 | a code fragment and removed from the code block prior to processing for |
| 149 | code highlighting. When a mock shebang (e.i: #!python) is found, line |
| 150 | numbering is turned on. When colons are found in place of a shebang |
| 151 | (e.i.: :::python), line numbering is left in the current state - off |
| 152 | by default. |
| 153 | |
| 154 | Also parses optional list of highlight lines, like: |
| 155 | |
| 156 | :::python hl_lines="1 3" |
| 157 | """ |
| 158 | |
| 159 | import re |
| 160 | |
| 161 | # split text into lines |
| 162 | lines = self.src.split("\n") |
| 163 | # pull first line to examine |
| 164 | fl = lines.pop(0) |
| 165 | |
| 166 | c = re.compile(r''' |
| 167 | (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons |
| 168 | (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path |
| 169 | (?P<lang>[\w+-]*) # The language |
| 170 | \s* # Arbitrary whitespace |
| 171 | # Optional highlight lines, single- or double-quote-delimited |
| 172 | (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))? |
| 173 | ''', re.VERBOSE) |
| 174 | # search first line for shebang |
| 175 | m = c.search(fl) |
| 176 | if m: |
| 177 | # we have a match |
| 178 | try: |
| 179 | self.lang = m.group('lang').lower() |
| 180 | except IndexError: |
| 181 | self.lang = None |
| 182 | if m.group('path'): |
| 183 | # path exists - restore first line |
| 184 | lines.insert(0, fl) |
| 185 | if self.linenums is None and m.group('shebang'): |
| 186 | # Overridable and Shebang exists - use line numbers |
| 187 | self.linenums = True |
| 188 | |
| 189 | self.hl_lines = parse_hl_lines(m.group('hl_lines')) |
| 190 | else: |
| 191 | # No match |
| 192 | lines.insert(0, fl) |
| 193 | |
| 194 | self.src = "\n".join(lines).strip("\n") |
| 195 | |
| 196 | |
| 197 | # ------------------ The Markdown Extension ------------------------------- |
| 198 | |
| 199 | |
| 200 | class HiliteTreeprocessor(Treeprocessor): |
| 201 | """ Hilight source code in code blocks. """ |
| 202 | |
| 203 | def run(self, root): |
| 204 | """ Find code blocks and store in htmlStash. """ |
| 205 | blocks = root.iter('pre') |
| 206 | for block in blocks: |
| 207 | if len(block) == 1 and block[0].tag == 'code': |
| 208 | code = CodeHilite( |
| 209 | block[0].text, |
| 210 | linenums=self.config['linenums'], |
| 211 | guess_lang=self.config['guess_lang'], |
| 212 | css_class=self.config['css_class'], |
| 213 | style=self.config['pygments_style'], |
| 214 | noclasses=self.config['noclasses'], |
| 215 | tab_length=self.markdown.tab_length, |
| 216 | use_pygments=self.config['use_pygments'] |
| 217 | ) |
| 218 | placeholder = self.markdown.htmlStash.store(code.hilite(), |
| 219 | safe=True) |
| 220 | # Clear codeblock in etree instance |
| 221 | block.clear() |
| 222 | # Change to p element which will later |
| 223 | # be removed when inserting raw html |
| 224 | block.tag = 'p' |
| 225 | block.text = placeholder |
| 226 | |
| 227 | |
| 228 | class CodeHiliteExtension(Extension): |
| 229 | """ Add source code hilighting to markdown codeblocks. """ |
| 230 | |
| 231 | def __init__(self, *args, **kwargs): |
| 232 | # define default configs |
| 233 | self.config = { |
| 234 | 'linenums': [None, |
| 235 | "Use lines numbers. True=yes, False=no, None=auto"], |
| 236 | 'guess_lang': [True, |
| 237 | "Automatic language detection - Default: True"], |
| 238 | 'css_class': ["codehilite", |
| 239 | "Set class name for wrapper <div> - " |
| 240 | "Default: codehilite"], |
| 241 | 'pygments_style': ['default', |
| 242 | 'Pygments HTML Formatter Style ' |
| 243 | '(Colorscheme) - Default: default'], |
| 244 | 'noclasses': [False, |
| 245 | 'Use inline styles instead of CSS classes - ' |
| 246 | 'Default false'], |
| 247 | 'use_pygments': [True, |
| 248 | 'Use Pygments to Highlight code blocks. ' |
| 249 | 'Disable if using a JavaScript library. ' |
| 250 | 'Default: True'] |
| 251 | } |
| 252 | |
| 253 | super(CodeHiliteExtension, self).__init__(*args, **kwargs) |
| 254 | |
| 255 | def extendMarkdown(self, md, md_globals): |
| 256 | """ Add HilitePostprocessor to Markdown instance. """ |
| 257 | hiliter = HiliteTreeprocessor(md) |
| 258 | hiliter.config = self.getConfigs() |
| 259 | md.treeprocessors.add("hilite", hiliter, "<inline") |
| 260 | |
| 261 | md.registerExtension(self) |
| 262 | |
| 263 | |
| 264 | def makeExtension(*args, **kwargs): |
| 265 | return CodeHiliteExtension(*args, **kwargs) |