dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 1 | """ |
| 2 | CodeHilite Extension for Python-Markdown |
| 3 | ======================================== |
| 4 | |
| 5 | Adds code/syntax highlighting to standard Python-Markdown code blocks. |
| 6 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 7 | See <https://Python-Markdown.github.io/extensions/code_hilite> |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 8 | for documentation. |
| 9 | |
| 10 | Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/). |
| 11 | |
| 12 | All changes Copyright 2008-2014 The Python Markdown Project |
| 13 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 14 | License: [BSD](https://opensource.org/licenses/bsd-license.php) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 15 | |
| 16 | """ |
| 17 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 18 | from . import Extension |
| 19 | from ..treeprocessors import Treeprocessor |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 20 | from ..util import parseBoolValue |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 21 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 22 | try: # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 23 | from pygments import highlight |
| 24 | from pygments.lexers import get_lexer_by_name, guess_lexer |
| 25 | from pygments.formatters import get_formatter_by_name |
| 26 | pygments = True |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 27 | except ImportError: # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 28 | pygments = False |
| 29 | |
| 30 | |
| 31 | def parse_hl_lines(expr): |
| 32 | """Support our syntax for emphasizing certain lines of code. |
| 33 | |
| 34 | expr should be like '1 2' to emphasize lines 1 and 2 of a code block. |
| 35 | Returns a list of ints, the line numbers to emphasize. |
| 36 | """ |
| 37 | if not expr: |
| 38 | return [] |
| 39 | |
| 40 | try: |
| 41 | return list(map(int, expr.split())) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 42 | except ValueError: # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 43 | return [] |
| 44 | |
| 45 | |
| 46 | # ------------------ The Main CodeHilite Class ---------------------- |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 47 | class CodeHilite: |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 48 | """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 49 | Determine language of source code, and pass it on to the Pygments highlighter. |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 50 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 51 | Usage: |
| 52 | code = CodeHilite(src=some_code, lang='python') |
| 53 | html = code.hilite() |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 54 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 55 | Arguments: |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 56 | * src: Source string or any object with a .readline attribute. |
| 57 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 58 | * lang: String name of Pygments lexer to use for highlighting. Default: `None`. |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 59 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 60 | * guess_lang: Auto-detect which lexer to use. Ignored if `lang` is set to a valid |
| 61 | value. Default: `True`. |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 62 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 63 | * use_pygments: Pass code to pygments for code highlighting. If `False`, the code is |
| 64 | instead wrapped for highlighting by a JavaScript library. Default: `True`. |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 65 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 66 | * linenums: An alias to Pygments `linenos` formatter option. Default: `None`. |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 67 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 68 | * css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'. |
| 69 | |
| 70 | * lang_prefix: Prefix prepended to the language when `use_pygments` is `False`. |
| 71 | Default: "language-". |
| 72 | |
| 73 | Other Options: |
| 74 | Any other options are accepted and passed on to the lexer and formatter. Therefore, |
| 75 | valid options include any options which are accepted by the `html` formatter or |
| 76 | whichever lexer the code's language uses. Note that most lexers do not have any |
| 77 | options. However, a few have very useful options, such as PHP's `startinline` option. |
| 78 | Any invalid options are ignored without error. |
| 79 | |
| 80 | Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter |
| 81 | Lexer Options: https://pygments.org/docs/lexers/ |
| 82 | |
| 83 | Advanced Usage: |
| 84 | code = CodeHilite( |
| 85 | src = some_code, |
| 86 | lang = 'php', |
| 87 | startinline = True, # Lexer option. Snippet does not start with `<?php`. |
| 88 | linenostart = 42, # Formatter option. Snippet starts on line 42. |
| 89 | hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50. |
| 90 | linenos = 'inline' # Formatter option. Avoid alignment problems. |
| 91 | ) |
| 92 | html = code.hilite() |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 93 | |
| 94 | """ |
| 95 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 96 | def __init__(self, src, **options): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 97 | self.src = src |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 98 | self.lang = options.pop('lang', None) |
| 99 | self.guess_lang = options.pop('guess_lang', True) |
| 100 | self.use_pygments = options.pop('use_pygments', True) |
| 101 | self.lang_prefix = options.pop('lang_prefix', 'language-') |
| 102 | |
| 103 | if 'linenos' not in options: |
| 104 | options['linenos'] = options.pop('linenums', None) |
| 105 | if 'cssclass' not in options: |
| 106 | options['cssclass'] = options.pop('css_class', 'codehilite') |
| 107 | if 'wrapcode' not in options: |
| 108 | # Override pygments default |
| 109 | options['wrapcode'] = True |
| 110 | # Disallow use of `full` option |
| 111 | options['full'] = False |
| 112 | |
| 113 | self.options = options |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 114 | |
| 115 | def hilite(self): |
| 116 | """ |
| 117 | Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with |
| 118 | optional line numbers. The output should then be styled with css to |
| 119 | your liking. No styles are applied by default - only styling hooks |
| 120 | (i.e.: <span class="k">). |
| 121 | |
| 122 | returns : A string of html. |
| 123 | |
| 124 | """ |
| 125 | |
| 126 | self.src = self.src.strip('\n') |
| 127 | |
| 128 | if self.lang is None: |
| 129 | self._parseHeader() |
| 130 | |
| 131 | if pygments and self.use_pygments: |
| 132 | try: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 133 | lexer = get_lexer_by_name(self.lang, **self.options) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 134 | except ValueError: |
| 135 | try: |
| 136 | if self.guess_lang: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 137 | lexer = guess_lexer(self.src, **self.options) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 138 | else: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 139 | lexer = get_lexer_by_name('text', **self.options) |
| 140 | except ValueError: # pragma: no cover |
| 141 | lexer = get_lexer_by_name('text', **self.options) |
| 142 | formatter = get_formatter_by_name('html', **self.options) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 143 | return highlight(self.src, lexer, formatter) |
| 144 | else: |
| 145 | # just escape and build markup usable by JS highlighting libs |
| 146 | txt = self.src.replace('&', '&') |
| 147 | txt = txt.replace('<', '<') |
| 148 | txt = txt.replace('>', '>') |
| 149 | txt = txt.replace('"', '"') |
| 150 | classes = [] |
| 151 | if self.lang: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 152 | classes.append('{}{}'.format(self.lang_prefix, self.lang)) |
| 153 | if self.options['linenos']: |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 154 | classes.append('linenums') |
| 155 | class_str = '' |
| 156 | if classes: |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 157 | class_str = ' class="{}"'.format(' '.join(classes)) |
| 158 | return '<pre class="{}"><code{}>{}\n</code></pre>\n'.format( |
| 159 | self.options['cssclass'], |
| 160 | class_str, |
| 161 | txt |
| 162 | ) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 163 | |
| 164 | def _parseHeader(self): |
| 165 | """ |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 166 | Determines language of a code block from shebang line and whether the |
| 167 | said line should be removed or left in place. If the sheband line |
| 168 | contains a path (even a single /) then it is assumed to be a real |
| 169 | shebang line and left alone. However, if no path is given |
| 170 | (e.i.: #!python or :::python) then it is assumed to be a mock shebang |
| 171 | for language identification of a code fragment and removed from the |
| 172 | code block prior to processing for code highlighting. When a mock |
| 173 | shebang (e.i: #!python) is found, line numbering is turned on. When |
| 174 | colons are found in place of a shebang (e.i.: :::python), line |
| 175 | numbering is left in the current state - off by default. |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 176 | |
| 177 | Also parses optional list of highlight lines, like: |
| 178 | |
| 179 | :::python hl_lines="1 3" |
| 180 | """ |
| 181 | |
| 182 | import re |
| 183 | |
| 184 | # split text into lines |
| 185 | lines = self.src.split("\n") |
| 186 | # pull first line to examine |
| 187 | fl = lines.pop(0) |
| 188 | |
| 189 | c = re.compile(r''' |
| 190 | (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons |
| 191 | (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 192 | (?P<lang>[\w#.+-]*) # The language |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 193 | \s* # Arbitrary whitespace |
| 194 | # Optional highlight lines, single- or double-quote-delimited |
| 195 | (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))? |
| 196 | ''', re.VERBOSE) |
| 197 | # search first line for shebang |
| 198 | m = c.search(fl) |
| 199 | if m: |
| 200 | # we have a match |
| 201 | try: |
| 202 | self.lang = m.group('lang').lower() |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 203 | except IndexError: # pragma: no cover |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 204 | self.lang = None |
| 205 | if m.group('path'): |
| 206 | # path exists - restore first line |
| 207 | lines.insert(0, fl) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 208 | if self.options['linenos'] is None and m.group('shebang'): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 209 | # Overridable and Shebang exists - use line numbers |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 210 | self.options['linenos'] = True |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 211 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 212 | self.options['hl_lines'] = parse_hl_lines(m.group('hl_lines')) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 213 | else: |
| 214 | # No match |
| 215 | lines.insert(0, fl) |
| 216 | |
| 217 | self.src = "\n".join(lines).strip("\n") |
| 218 | |
| 219 | |
| 220 | # ------------------ The Markdown Extension ------------------------------- |
| 221 | |
| 222 | |
| 223 | class HiliteTreeprocessor(Treeprocessor): |
| 224 | """ Hilight source code in code blocks. """ |
| 225 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 226 | def code_unescape(self, text): |
| 227 | """Unescape code.""" |
| 228 | text = text.replace("<", "<") |
| 229 | text = text.replace(">", ">") |
| 230 | # Escaped '&' should be replaced at the end to avoid |
| 231 | # conflicting with < and >. |
| 232 | text = text.replace("&", "&") |
| 233 | return text |
| 234 | |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 235 | def run(self, root): |
| 236 | """ Find code blocks and store in htmlStash. """ |
| 237 | blocks = root.iter('pre') |
| 238 | for block in blocks: |
| 239 | if len(block) == 1 and block[0].tag == 'code': |
| 240 | code = CodeHilite( |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 241 | self.code_unescape(block[0].text), |
| 242 | tab_length=self.md.tab_length, |
| 243 | style=self.config.pop('pygments_style', 'default'), |
| 244 | **self.config |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 245 | ) |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 246 | placeholder = self.md.htmlStash.store(code.hilite()) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 247 | # Clear codeblock in etree instance |
| 248 | block.clear() |
| 249 | # Change to p element which will later |
| 250 | # be removed when inserting raw html |
| 251 | block.tag = 'p' |
| 252 | block.text = placeholder |
| 253 | |
| 254 | |
| 255 | class CodeHiliteExtension(Extension): |
| 256 | """ Add source code hilighting to markdown codeblocks. """ |
| 257 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 258 | def __init__(self, **kwargs): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 259 | # define default configs |
| 260 | self.config = { |
| 261 | 'linenums': [None, |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 262 | "Use lines numbers. True|table|inline=yes, False=no, None=auto"], |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 263 | 'guess_lang': [True, |
| 264 | "Automatic language detection - Default: True"], |
| 265 | 'css_class': ["codehilite", |
| 266 | "Set class name for wrapper <div> - " |
| 267 | "Default: codehilite"], |
| 268 | 'pygments_style': ['default', |
| 269 | 'Pygments HTML Formatter Style ' |
| 270 | '(Colorscheme) - Default: default'], |
| 271 | 'noclasses': [False, |
| 272 | 'Use inline styles instead of CSS classes - ' |
| 273 | 'Default false'], |
| 274 | 'use_pygments': [True, |
| 275 | 'Use Pygments to Highlight code blocks. ' |
| 276 | 'Disable if using a JavaScript library. ' |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 277 | 'Default: True'], |
| 278 | 'lang_prefix': [ |
| 279 | 'language-', |
| 280 | 'Prefix prepended to the language when use_pygments is false. Default: "language-"' |
| 281 | ] |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 282 | } |
| 283 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 284 | for key, value in kwargs.items(): |
| 285 | if key in self.config: |
| 286 | self.setConfig(key, value) |
| 287 | else: |
| 288 | # manually set unknown keywords. |
| 289 | if isinstance(value, str): |
| 290 | try: |
| 291 | # Attempt to parse str as a bool value |
| 292 | value = parseBoolValue(value, preserve_none=True) |
| 293 | except ValueError: |
| 294 | pass # Assume it's not a bool value. Use as-is. |
| 295 | self.config[key] = [value, ''] |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 296 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 297 | def extendMarkdown(self, md): |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 298 | """ Add HilitePostprocessor to Markdown instance. """ |
| 299 | hiliter = HiliteTreeprocessor(md) |
| 300 | hiliter.config = self.getConfigs() |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 301 | md.treeprocessors.register(hiliter, 'hilite', 30) |
dpranke | b08af21 | 2015-10-06 17:44:36 -0700 | [diff] [blame] | 302 | |
| 303 | md.registerExtension(self) |
| 304 | |
| 305 | |
Yu-Ping Wu | 6a8f3a2 | 2021-11-24 00:45:03 +0000 | [diff] [blame^] | 306 | def makeExtension(**kwargs): # pragma: no cover |
| 307 | return CodeHiliteExtension(**kwargs) |