blob: 9eed561fc0eff39714f9c9a4be0a8e3974a4e483 [file] [log] [blame]
dprankeb08af212015-10-06 17:44:36 -07001"""
2CodeHilite Extension for Python-Markdown
3========================================
4
5Adds code/syntax highlighting to standard Python-Markdown code blocks.
6
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +00007See <https://Python-Markdown.github.io/extensions/code_hilite>
dprankeb08af212015-10-06 17:44:36 -07008for documentation.
9
10Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/).
11
12All changes Copyright 2008-2014 The Python Markdown Project
13
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000014License: [BSD](https://opensource.org/licenses/bsd-license.php)
dprankeb08af212015-10-06 17:44:36 -070015
16"""
17
dprankeb08af212015-10-06 17:44:36 -070018from . import Extension
19from ..treeprocessors import Treeprocessor
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000020from ..util import parseBoolValue
dprankeb08af212015-10-06 17:44:36 -070021
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000022try: # pragma: no cover
dprankeb08af212015-10-06 17:44:36 -070023 from pygments import highlight
24 from pygments.lexers import get_lexer_by_name, guess_lexer
25 from pygments.formatters import get_formatter_by_name
26 pygments = True
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000027except ImportError: # pragma: no cover
dprankeb08af212015-10-06 17:44:36 -070028 pygments = False
29
30
31def parse_hl_lines(expr):
32 """Support our syntax for emphasizing certain lines of code.
33
34 expr should be like '1 2' to emphasize lines 1 and 2 of a code block.
35 Returns a list of ints, the line numbers to emphasize.
36 """
37 if not expr:
38 return []
39
40 try:
41 return list(map(int, expr.split()))
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000042 except ValueError: # pragma: no cover
dprankeb08af212015-10-06 17:44:36 -070043 return []
44
45
46# ------------------ The Main CodeHilite Class ----------------------
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000047class CodeHilite:
dprankeb08af212015-10-06 17:44:36 -070048 """
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000049 Determine language of source code, and pass it on to the Pygments highlighter.
dprankeb08af212015-10-06 17:44:36 -070050
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000051 Usage:
52 code = CodeHilite(src=some_code, lang='python')
53 html = code.hilite()
dprankeb08af212015-10-06 17:44:36 -070054
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000055 Arguments:
dprankeb08af212015-10-06 17:44:36 -070056 * src: Source string or any object with a .readline attribute.
57
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000058 * lang: String name of Pygments lexer to use for highlighting. Default: `None`.
dprankeb08af212015-10-06 17:44:36 -070059
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000060 * guess_lang: Auto-detect which lexer to use. Ignored if `lang` is set to a valid
61 value. Default: `True`.
dprankeb08af212015-10-06 17:44:36 -070062
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000063 * use_pygments: Pass code to pygments for code highlighting. If `False`, the code is
64 instead wrapped for highlighting by a JavaScript library. Default: `True`.
dprankeb08af212015-10-06 17:44:36 -070065
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000066 * linenums: An alias to Pygments `linenos` formatter option. Default: `None`.
dprankeb08af212015-10-06 17:44:36 -070067
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000068 * css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'.
69
70 * lang_prefix: Prefix prepended to the language when `use_pygments` is `False`.
71 Default: "language-".
72
73 Other Options:
74 Any other options are accepted and passed on to the lexer and formatter. Therefore,
75 valid options include any options which are accepted by the `html` formatter or
76 whichever lexer the code's language uses. Note that most lexers do not have any
77 options. However, a few have very useful options, such as PHP's `startinline` option.
78 Any invalid options are ignored without error.
79
80 Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter
81 Lexer Options: https://pygments.org/docs/lexers/
82
83 Advanced Usage:
84 code = CodeHilite(
85 src = some_code,
86 lang = 'php',
87 startinline = True, # Lexer option. Snippet does not start with `<?php`.
88 linenostart = 42, # Formatter option. Snippet starts on line 42.
89 hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50.
90 linenos = 'inline' # Formatter option. Avoid alignment problems.
91 )
92 html = code.hilite()
dprankeb08af212015-10-06 17:44:36 -070093
94 """
95
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000096 def __init__(self, src, **options):
dprankeb08af212015-10-06 17:44:36 -070097 self.src = src
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +000098 self.lang = options.pop('lang', None)
99 self.guess_lang = options.pop('guess_lang', True)
100 self.use_pygments = options.pop('use_pygments', True)
101 self.lang_prefix = options.pop('lang_prefix', 'language-')
102
103 if 'linenos' not in options:
104 options['linenos'] = options.pop('linenums', None)
105 if 'cssclass' not in options:
106 options['cssclass'] = options.pop('css_class', 'codehilite')
107 if 'wrapcode' not in options:
108 # Override pygments default
109 options['wrapcode'] = True
110 # Disallow use of `full` option
111 options['full'] = False
112
113 self.options = options
dprankeb08af212015-10-06 17:44:36 -0700114
115 def hilite(self):
116 """
117 Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with
118 optional line numbers. The output should then be styled with css to
119 your liking. No styles are applied by default - only styling hooks
120 (i.e.: <span class="k">).
121
122 returns : A string of html.
123
124 """
125
126 self.src = self.src.strip('\n')
127
128 if self.lang is None:
129 self._parseHeader()
130
131 if pygments and self.use_pygments:
132 try:
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000133 lexer = get_lexer_by_name(self.lang, **self.options)
dprankeb08af212015-10-06 17:44:36 -0700134 except ValueError:
135 try:
136 if self.guess_lang:
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000137 lexer = guess_lexer(self.src, **self.options)
dprankeb08af212015-10-06 17:44:36 -0700138 else:
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000139 lexer = get_lexer_by_name('text', **self.options)
140 except ValueError: # pragma: no cover
141 lexer = get_lexer_by_name('text', **self.options)
142 formatter = get_formatter_by_name('html', **self.options)
dprankeb08af212015-10-06 17:44:36 -0700143 return highlight(self.src, lexer, formatter)
144 else:
145 # just escape and build markup usable by JS highlighting libs
146 txt = self.src.replace('&', '&amp;')
147 txt = txt.replace('<', '&lt;')
148 txt = txt.replace('>', '&gt;')
149 txt = txt.replace('"', '&quot;')
150 classes = []
151 if self.lang:
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000152 classes.append('{}{}'.format(self.lang_prefix, self.lang))
153 if self.options['linenos']:
dprankeb08af212015-10-06 17:44:36 -0700154 classes.append('linenums')
155 class_str = ''
156 if classes:
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000157 class_str = ' class="{}"'.format(' '.join(classes))
158 return '<pre class="{}"><code{}>{}\n</code></pre>\n'.format(
159 self.options['cssclass'],
160 class_str,
161 txt
162 )
dprankeb08af212015-10-06 17:44:36 -0700163
164 def _parseHeader(self):
165 """
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000166 Determines language of a code block from shebang line and whether the
167 said line should be removed or left in place. If the sheband line
168 contains a path (even a single /) then it is assumed to be a real
169 shebang line and left alone. However, if no path is given
170 (e.i.: #!python or :::python) then it is assumed to be a mock shebang
171 for language identification of a code fragment and removed from the
172 code block prior to processing for code highlighting. When a mock
173 shebang (e.i: #!python) is found, line numbering is turned on. When
174 colons are found in place of a shebang (e.i.: :::python), line
175 numbering is left in the current state - off by default.
dprankeb08af212015-10-06 17:44:36 -0700176
177 Also parses optional list of highlight lines, like:
178
179 :::python hl_lines="1 3"
180 """
181
182 import re
183
184 # split text into lines
185 lines = self.src.split("\n")
186 # pull first line to examine
187 fl = lines.pop(0)
188
189 c = re.compile(r'''
190 (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons
191 (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000192 (?P<lang>[\w#.+-]*) # The language
dprankeb08af212015-10-06 17:44:36 -0700193 \s* # Arbitrary whitespace
194 # Optional highlight lines, single- or double-quote-delimited
195 (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?
196 ''', re.VERBOSE)
197 # search first line for shebang
198 m = c.search(fl)
199 if m:
200 # we have a match
201 try:
202 self.lang = m.group('lang').lower()
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000203 except IndexError: # pragma: no cover
dprankeb08af212015-10-06 17:44:36 -0700204 self.lang = None
205 if m.group('path'):
206 # path exists - restore first line
207 lines.insert(0, fl)
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000208 if self.options['linenos'] is None and m.group('shebang'):
dprankeb08af212015-10-06 17:44:36 -0700209 # Overridable and Shebang exists - use line numbers
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000210 self.options['linenos'] = True
dprankeb08af212015-10-06 17:44:36 -0700211
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000212 self.options['hl_lines'] = parse_hl_lines(m.group('hl_lines'))
dprankeb08af212015-10-06 17:44:36 -0700213 else:
214 # No match
215 lines.insert(0, fl)
216
217 self.src = "\n".join(lines).strip("\n")
218
219
220# ------------------ The Markdown Extension -------------------------------
221
222
223class HiliteTreeprocessor(Treeprocessor):
224 """ Hilight source code in code blocks. """
225
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000226 def code_unescape(self, text):
227 """Unescape code."""
228 text = text.replace("&lt;", "<")
229 text = text.replace("&gt;", ">")
230 # Escaped '&' should be replaced at the end to avoid
231 # conflicting with < and >.
232 text = text.replace("&amp;", "&")
233 return text
234
dprankeb08af212015-10-06 17:44:36 -0700235 def run(self, root):
236 """ Find code blocks and store in htmlStash. """
237 blocks = root.iter('pre')
238 for block in blocks:
239 if len(block) == 1 and block[0].tag == 'code':
240 code = CodeHilite(
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000241 self.code_unescape(block[0].text),
242 tab_length=self.md.tab_length,
243 style=self.config.pop('pygments_style', 'default'),
244 **self.config
dprankeb08af212015-10-06 17:44:36 -0700245 )
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000246 placeholder = self.md.htmlStash.store(code.hilite())
dprankeb08af212015-10-06 17:44:36 -0700247 # Clear codeblock in etree instance
248 block.clear()
249 # Change to p element which will later
250 # be removed when inserting raw html
251 block.tag = 'p'
252 block.text = placeholder
253
254
255class CodeHiliteExtension(Extension):
256 """ Add source code hilighting to markdown codeblocks. """
257
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000258 def __init__(self, **kwargs):
dprankeb08af212015-10-06 17:44:36 -0700259 # define default configs
260 self.config = {
261 'linenums': [None,
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000262 "Use lines numbers. True|table|inline=yes, False=no, None=auto"],
dprankeb08af212015-10-06 17:44:36 -0700263 'guess_lang': [True,
264 "Automatic language detection - Default: True"],
265 'css_class': ["codehilite",
266 "Set class name for wrapper <div> - "
267 "Default: codehilite"],
268 'pygments_style': ['default',
269 'Pygments HTML Formatter Style '
270 '(Colorscheme) - Default: default'],
271 'noclasses': [False,
272 'Use inline styles instead of CSS classes - '
273 'Default false'],
274 'use_pygments': [True,
275 'Use Pygments to Highlight code blocks. '
276 'Disable if using a JavaScript library. '
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000277 'Default: True'],
278 'lang_prefix': [
279 'language-',
280 'Prefix prepended to the language when use_pygments is false. Default: "language-"'
281 ]
dprankeb08af212015-10-06 17:44:36 -0700282 }
283
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000284 for key, value in kwargs.items():
285 if key in self.config:
286 self.setConfig(key, value)
287 else:
288 # manually set unknown keywords.
289 if isinstance(value, str):
290 try:
291 # Attempt to parse str as a bool value
292 value = parseBoolValue(value, preserve_none=True)
293 except ValueError:
294 pass # Assume it's not a bool value. Use as-is.
295 self.config[key] = [value, '']
dprankeb08af212015-10-06 17:44:36 -0700296
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000297 def extendMarkdown(self, md):
dprankeb08af212015-10-06 17:44:36 -0700298 """ Add HilitePostprocessor to Markdown instance. """
299 hiliter = HiliteTreeprocessor(md)
300 hiliter.config = self.getConfigs()
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000301 md.treeprocessors.register(hiliter, 'hilite', 30)
dprankeb08af212015-10-06 17:44:36 -0700302
303 md.registerExtension(self)
304
305
Yu-Ping Wu6a8f3a22021-11-24 00:45:03 +0000306def makeExtension(**kwargs): # pragma: no cover
307 return CodeHiliteExtension(**kwargs)