blob: a54ba21c01d232b72a5d820a8b2f9211674be279 [file] [log] [blame]
dprankeb08af212015-10-06 17:44:36 -07001"""
2CodeHilite Extension for Python-Markdown
3========================================
4
5Adds code/syntax highlighting to standard Python-Markdown code blocks.
6
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +00007See <https://Python-Markdown.github.io/extensions/code_hilite>
dprankeb08af212015-10-06 17:44:36 -07008for documentation.
9
10Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/).
11
12All changes Copyright 2008-2014 The Python Markdown Project
13
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000014License: [BSD](https://opensource.org/licenses/bsd-license.php)
dprankeb08af212015-10-06 17:44:36 -070015
16"""
17
dprankeb08af212015-10-06 17:44:36 -070018from . import Extension
19from ..treeprocessors import Treeprocessor
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000020from ..util import parseBoolValue
dprankeb08af212015-10-06 17:44:36 -070021
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000022try: # pragma: no cover
dprankeb08af212015-10-06 17:44:36 -070023 from pygments import highlight
24 from pygments.lexers import get_lexer_by_name, guess_lexer
25 from pygments.formatters import get_formatter_by_name
揚帆起航0f447352022-11-28 22:32:11 +000026 from pygments.util import ClassNotFound
dprankeb08af212015-10-06 17:44:36 -070027 pygments = True
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000028except ImportError: # pragma: no cover
dprankeb08af212015-10-06 17:44:36 -070029 pygments = False
30
31
32def parse_hl_lines(expr):
33 """Support our syntax for emphasizing certain lines of code.
34
35 expr should be like '1 2' to emphasize lines 1 and 2 of a code block.
36 Returns a list of ints, the line numbers to emphasize.
37 """
38 if not expr:
39 return []
40
41 try:
42 return list(map(int, expr.split()))
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000043 except ValueError: # pragma: no cover
dprankeb08af212015-10-06 17:44:36 -070044 return []
45
46
47# ------------------ The Main CodeHilite Class ----------------------
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000048class CodeHilite:
dprankeb08af212015-10-06 17:44:36 -070049 """
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000050 Determine language of source code, and pass it on to the Pygments highlighter.
dprankeb08af212015-10-06 17:44:36 -070051
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000052 Usage:
53 code = CodeHilite(src=some_code, lang='python')
54 html = code.hilite()
dprankeb08af212015-10-06 17:44:36 -070055
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000056 Arguments:
dprankeb08af212015-10-06 17:44:36 -070057 * src: Source string or any object with a .readline attribute.
58
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000059 * lang: String name of Pygments lexer to use for highlighting. Default: `None`.
dprankeb08af212015-10-06 17:44:36 -070060
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000061 * guess_lang: Auto-detect which lexer to use. Ignored if `lang` is set to a valid
62 value. Default: `True`.
dprankeb08af212015-10-06 17:44:36 -070063
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000064 * use_pygments: Pass code to pygments for code highlighting. If `False`, the code is
65 instead wrapped for highlighting by a JavaScript library. Default: `True`.
dprankeb08af212015-10-06 17:44:36 -070066
揚帆起航0f447352022-11-28 22:32:11 +000067 * pygments_formatter: The name of a Pygments formatter or a formatter class used for
68 highlighting the code blocks. Default: `html`.
69
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000070 * linenums: An alias to Pygments `linenos` formatter option. Default: `None`.
dprankeb08af212015-10-06 17:44:36 -070071
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000072 * css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'.
73
揚帆起航0f447352022-11-28 22:32:11 +000074 * lang_prefix: Prefix prepended to the language. Default: "language-".
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000075
76 Other Options:
77 Any other options are accepted and passed on to the lexer and formatter. Therefore,
78 valid options include any options which are accepted by the `html` formatter or
79 whichever lexer the code's language uses. Note that most lexers do not have any
80 options. However, a few have very useful options, such as PHP's `startinline` option.
81 Any invalid options are ignored without error.
82
83 Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter
84 Lexer Options: https://pygments.org/docs/lexers/
85
揚帆起航0f447352022-11-28 22:32:11 +000086 Additionally, when Pygments is enabled, the code's language is passed to the
87 formatter as an extra option `lang_str`, whose value being `{lang_prefix}{lang}`.
88 This option has no effect to the Pygments's builtin formatters.
89
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000090 Advanced Usage:
91 code = CodeHilite(
92 src = some_code,
93 lang = 'php',
94 startinline = True, # Lexer option. Snippet does not start with `<?php`.
95 linenostart = 42, # Formatter option. Snippet starts on line 42.
96 hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50.
97 linenos = 'inline' # Formatter option. Avoid alignment problems.
98 )
99 html = code.hilite()
dprankeb08af212015-10-06 17:44:36 -0700100
101 """
102
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000103 def __init__(self, src, **options):
dprankeb08af212015-10-06 17:44:36 -0700104 self.src = src
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000105 self.lang = options.pop('lang', None)
106 self.guess_lang = options.pop('guess_lang', True)
107 self.use_pygments = options.pop('use_pygments', True)
108 self.lang_prefix = options.pop('lang_prefix', 'language-')
揚帆起航0f447352022-11-28 22:32:11 +0000109 self.pygments_formatter = options.pop('pygments_formatter', 'html')
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000110
111 if 'linenos' not in options:
112 options['linenos'] = options.pop('linenums', None)
113 if 'cssclass' not in options:
114 options['cssclass'] = options.pop('css_class', 'codehilite')
115 if 'wrapcode' not in options:
116 # Override pygments default
117 options['wrapcode'] = True
118 # Disallow use of `full` option
119 options['full'] = False
120
121 self.options = options
dprankeb08af212015-10-06 17:44:36 -0700122
揚帆起航0f447352022-11-28 22:32:11 +0000123 def hilite(self, shebang=True):
dprankeb08af212015-10-06 17:44:36 -0700124 """
125 Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with
126 optional line numbers. The output should then be styled with css to
127 your liking. No styles are applied by default - only styling hooks
128 (i.e.: <span class="k">).
129
130 returns : A string of html.
131
132 """
133
134 self.src = self.src.strip('\n')
135
揚帆起航0f447352022-11-28 22:32:11 +0000136 if self.lang is None and shebang:
dprankeb08af212015-10-06 17:44:36 -0700137 self._parseHeader()
138
139 if pygments and self.use_pygments:
140 try:
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000141 lexer = get_lexer_by_name(self.lang, **self.options)
dprankeb08af212015-10-06 17:44:36 -0700142 except ValueError:
143 try:
144 if self.guess_lang:
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000145 lexer = guess_lexer(self.src, **self.options)
dprankeb08af212015-10-06 17:44:36 -0700146 else:
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000147 lexer = get_lexer_by_name('text', **self.options)
148 except ValueError: # pragma: no cover
149 lexer = get_lexer_by_name('text', **self.options)
揚帆起航0f447352022-11-28 22:32:11 +0000150 if not self.lang:
151 # Use the guessed lexer's language instead
152 self.lang = lexer.aliases[0]
153 lang_str = f'{self.lang_prefix}{self.lang}'
154 if isinstance(self.pygments_formatter, str):
155 try:
156 formatter = get_formatter_by_name(self.pygments_formatter, **self.options)
157 except ClassNotFound:
158 formatter = get_formatter_by_name('html', **self.options)
159 else:
160 formatter = self.pygments_formatter(lang_str=lang_str, **self.options)
dprankeb08af212015-10-06 17:44:36 -0700161 return highlight(self.src, lexer, formatter)
162 else:
163 # just escape and build markup usable by JS highlighting libs
164 txt = self.src.replace('&', '&amp;')
165 txt = txt.replace('<', '&lt;')
166 txt = txt.replace('>', '&gt;')
167 txt = txt.replace('"', '&quot;')
168 classes = []
169 if self.lang:
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000170 classes.append('{}{}'.format(self.lang_prefix, self.lang))
171 if self.options['linenos']:
dprankeb08af212015-10-06 17:44:36 -0700172 classes.append('linenums')
173 class_str = ''
174 if classes:
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000175 class_str = ' class="{}"'.format(' '.join(classes))
176 return '<pre class="{}"><code{}>{}\n</code></pre>\n'.format(
177 self.options['cssclass'],
178 class_str,
179 txt
180 )
dprankeb08af212015-10-06 17:44:36 -0700181
182 def _parseHeader(self):
183 """
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000184 Determines language of a code block from shebang line and whether the
185 said line should be removed or left in place. If the sheband line
186 contains a path (even a single /) then it is assumed to be a real
187 shebang line and left alone. However, if no path is given
188 (e.i.: #!python or :::python) then it is assumed to be a mock shebang
189 for language identification of a code fragment and removed from the
190 code block prior to processing for code highlighting. When a mock
191 shebang (e.i: #!python) is found, line numbering is turned on. When
192 colons are found in place of a shebang (e.i.: :::python), line
193 numbering is left in the current state - off by default.
dprankeb08af212015-10-06 17:44:36 -0700194
195 Also parses optional list of highlight lines, like:
196
197 :::python hl_lines="1 3"
198 """
199
200 import re
201
202 # split text into lines
203 lines = self.src.split("\n")
204 # pull first line to examine
205 fl = lines.pop(0)
206
207 c = re.compile(r'''
208 (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons
209 (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000210 (?P<lang>[\w#.+-]*) # The language
dprankeb08af212015-10-06 17:44:36 -0700211 \s* # Arbitrary whitespace
212 # Optional highlight lines, single- or double-quote-delimited
213 (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?
214 ''', re.VERBOSE)
215 # search first line for shebang
216 m = c.search(fl)
217 if m:
218 # we have a match
219 try:
220 self.lang = m.group('lang').lower()
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000221 except IndexError: # pragma: no cover
dprankeb08af212015-10-06 17:44:36 -0700222 self.lang = None
223 if m.group('path'):
224 # path exists - restore first line
225 lines.insert(0, fl)
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000226 if self.options['linenos'] is None and m.group('shebang'):
dprankeb08af212015-10-06 17:44:36 -0700227 # Overridable and Shebang exists - use line numbers
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000228 self.options['linenos'] = True
dprankeb08af212015-10-06 17:44:36 -0700229
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000230 self.options['hl_lines'] = parse_hl_lines(m.group('hl_lines'))
dprankeb08af212015-10-06 17:44:36 -0700231 else:
232 # No match
233 lines.insert(0, fl)
234
235 self.src = "\n".join(lines).strip("\n")
236
237
238# ------------------ The Markdown Extension -------------------------------
239
240
241class HiliteTreeprocessor(Treeprocessor):
揚帆起航0f447352022-11-28 22:32:11 +0000242 """ Highlight source code in code blocks. """
dprankeb08af212015-10-06 17:44:36 -0700243
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000244 def code_unescape(self, text):
245 """Unescape code."""
246 text = text.replace("&lt;", "<")
247 text = text.replace("&gt;", ">")
248 # Escaped '&' should be replaced at the end to avoid
249 # conflicting with < and >.
250 text = text.replace("&amp;", "&")
251 return text
252
dprankeb08af212015-10-06 17:44:36 -0700253 def run(self, root):
254 """ Find code blocks and store in htmlStash. """
255 blocks = root.iter('pre')
256 for block in blocks:
257 if len(block) == 1 and block[0].tag == 'code':
揚帆起航0f447352022-11-28 22:32:11 +0000258 local_config = self.config.copy()
dprankeb08af212015-10-06 17:44:36 -0700259 code = CodeHilite(
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000260 self.code_unescape(block[0].text),
261 tab_length=self.md.tab_length,
揚帆起航0f447352022-11-28 22:32:11 +0000262 style=local_config.pop('pygments_style', 'default'),
263 **local_config
dprankeb08af212015-10-06 17:44:36 -0700264 )
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000265 placeholder = self.md.htmlStash.store(code.hilite())
dprankeb08af212015-10-06 17:44:36 -0700266 # Clear codeblock in etree instance
267 block.clear()
268 # Change to p element which will later
269 # be removed when inserting raw html
270 block.tag = 'p'
271 block.text = placeholder
272
273
274class CodeHiliteExtension(Extension):
揚帆起航0f447352022-11-28 22:32:11 +0000275 """ Add source code highlighting to markdown codeblocks. """
dprankeb08af212015-10-06 17:44:36 -0700276
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000277 def __init__(self, **kwargs):
dprankeb08af212015-10-06 17:44:36 -0700278 # define default configs
279 self.config = {
280 'linenums': [None,
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000281 "Use lines numbers. True|table|inline=yes, False=no, None=auto"],
dprankeb08af212015-10-06 17:44:36 -0700282 'guess_lang': [True,
283 "Automatic language detection - Default: True"],
284 'css_class': ["codehilite",
285 "Set class name for wrapper <div> - "
286 "Default: codehilite"],
287 'pygments_style': ['default',
288 'Pygments HTML Formatter Style '
289 '(Colorscheme) - Default: default'],
290 'noclasses': [False,
291 'Use inline styles instead of CSS classes - '
292 'Default false'],
293 'use_pygments': [True,
294 'Use Pygments to Highlight code blocks. '
295 'Disable if using a JavaScript library. '
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000296 'Default: True'],
297 'lang_prefix': [
298 'language-',
299 'Prefix prepended to the language when use_pygments is false. Default: "language-"'
揚帆起航0f447352022-11-28 22:32:11 +0000300 ],
301 'pygments_formatter': ['html',
302 'Use a specific formatter for Pygments highlighting.'
303 'Default: "html"',
304 ],
dprankeb08af212015-10-06 17:44:36 -0700305 }
306
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000307 for key, value in kwargs.items():
308 if key in self.config:
309 self.setConfig(key, value)
310 else:
311 # manually set unknown keywords.
312 if isinstance(value, str):
313 try:
314 # Attempt to parse str as a bool value
315 value = parseBoolValue(value, preserve_none=True)
316 except ValueError:
317 pass # Assume it's not a bool value. Use as-is.
318 self.config[key] = [value, '']
dprankeb08af212015-10-06 17:44:36 -0700319
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000320 def extendMarkdown(self, md):
dprankeb08af212015-10-06 17:44:36 -0700321 """ Add HilitePostprocessor to Markdown instance. """
322 hiliter = HiliteTreeprocessor(md)
323 hiliter.config = self.getConfigs()
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000324 md.treeprocessors.register(hiliter, 'hilite', 30)
dprankeb08af212015-10-06 17:44:36 -0700325
326 md.registerExtension(self)
327
328
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000329def makeExtension(**kwargs): # pragma: no cover
330 return CodeHiliteExtension(**kwargs)