Blame - markdown/inlinepatterns.py - chromium.googlesource.com/chromium/src/third_party/Python-Markdown

blob: b0621a8287b68eea8dfee39876e1f23ccbce98d2 [file] [log] [blame]

dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	1	"""
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	2	Python Markdown
				3
				4	A Python implementation of John Gruber's Markdown.
				5
				6	Documentation: https://python-markdown.github.io/
				7	GitHub: https://github.com/Python-Markdown/markdown/
				8	PyPI: https://pypi.org/project/Markdown/
				9
				10	Started by Manfred Stienstra (http://www.dwerg.net/).
				11	Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
				12	Currently maintained by Waylan Limberg (https://github.com/waylan),
				13	Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
				14
				15	Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
				16	Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
				17	Copyright 2004 Manfred Stienstra (the original version)
				18
				19	License: BSD (see LICENSE.md for details).
				20
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	21	INLINE PATTERNS
				22	=============================================================================
				23
				24	Inline patterns such as emphasis are handled by means of auxiliary
				25	objects, one per pattern. Pattern objects must be instances of classes
				26	that extend markdown.Pattern. Each pattern object uses a single regular
				27	expression and needs support the following methods:
				28
				29	pattern.getCompiledRegExp() # returns a regular expression
				30
				31	pattern.handleMatch(m) # takes a match object and returns
				32	# an ElementTree element or just plain text
				33
				34	All of python markdown's built-in patterns subclass from Pattern,
				35	but you can add additional patterns that don't.
				36
				37	Also note that all the regular expressions used by inline must
				38	capture the whole block. For this reason, they all start with
				39	'^(.)' and end with '(.)!'. In case with built-in expression
				40	Pattern takes care of adding the "^(.)" and "(.)!".
				41
				42	Finally, the order in which regular expressions are applied is very
				43	important - e.g. if we first replace http://.../ links with <a> tags
				44	and _then_ try to replace inline html, we would end up with a mess.
				45	So, we apply the expressions in the following order:
				46
				47	* escape and backticks have to go before everything else, so
				48	that we can preempt any markdown patterns by escaping them.
				49
				50	* then we handle auto-links (must be done before inline html)
				51
				52	* then we handle inline HTML. At this point we will simply
				53	replace all inline HTML strings with a placeholder and add
				54	the actual HTML to a hash.
				55
				56	* then inline images (must be done before links)
				57
				58	* then bracketed links, first regular then reference-style
				59
				60	* finally we apply strong and emphasis
				61	"""
				62
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	63	from . import util
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	64	from collections import namedtuple
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	65	import re
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	66	import xml.etree.ElementTree as etree
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	67	try: # pragma: no cover
				68	from html import entities
				69	except ImportError: # pragma: no cover
				70	import htmlentitydefs as entities
				71
				72
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	73	def build_inlinepatterns(md, **kwargs):
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	74	""" Build the default set of inline patterns for Markdown. """
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	75	inlinePatterns = util.Registry()
				76	inlinePatterns.register(BacktickInlineProcessor(BACKTICK_RE), 'backtick', 190)
				77	inlinePatterns.register(EscapeInlineProcessor(ESCAPE_RE, md), 'escape', 180)
				78	inlinePatterns.register(ReferenceInlineProcessor(REFERENCE_RE, md), 'reference', 170)
				79	inlinePatterns.register(LinkInlineProcessor(LINK_RE, md), 'link', 160)
				80	inlinePatterns.register(ImageInlineProcessor(IMAGE_LINK_RE, md), 'image_link', 150)
				81	inlinePatterns.register(
				82	ImageReferenceInlineProcessor(IMAGE_REFERENCE_RE, md), 'image_reference', 140
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	83	)
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	84	inlinePatterns.register(
				85	ShortReferenceInlineProcessor(REFERENCE_RE, md), 'short_reference', 130
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	86	)
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	87	inlinePatterns.register(
				88	ShortImageReferenceInlineProcessor(IMAGE_REFERENCE_RE, md), 'short_image_ref', 125
				89	)
				90	inlinePatterns.register(AutolinkInlineProcessor(AUTOLINK_RE, md), 'autolink', 120)
				91	inlinePatterns.register(AutomailInlineProcessor(AUTOMAIL_RE, md), 'automail', 110)
				92	inlinePatterns.register(SubstituteTagInlineProcessor(LINE_BREAK_RE, 'br'), 'linebreak', 100)
				93	inlinePatterns.register(HtmlInlineProcessor(HTML_RE, md), 'html', 90)
				94	inlinePatterns.register(HtmlInlineProcessor(ENTITY_RE, md), 'entity', 80)
				95	inlinePatterns.register(SimpleTextInlineProcessor(NOT_STRONG_RE), 'not_strong', 70)
				96	inlinePatterns.register(AsteriskProcessor(r'\*'), 'em_strong', 60)
				97	inlinePatterns.register(UnderscoreProcessor(r'_'), 'em_strong2', 50)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	98	return inlinePatterns
				99
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	100
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	101	"""
				102	The actual regular expressions for patterns
				103	-----------------------------------------------------------------------------
				104	"""
				105
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	106	NOIMG = r'(?<!\!)'
				107
				108	# `e=f()` or ``e=f("`")``
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	109	BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)\|(?<!\\)(`+)(.+?)(?<!`)\2(?!`))'
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	110
				111	# \<
				112	ESCAPE_RE = r'\\(.)'
				113
				114	# emphasis
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	115	EMPHASIS_RE = r'(\)([^\]+)\1'
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	116
				117	# strong
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	118	STRONG_RE = r'(\*{2})(.+?)\1'
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	119
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	120	# __smart__strong__
				121	SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\1(?!\w)'
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	122
				123	# _smart_emphasis_
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	124	SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\1(?!\w)'
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	125
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	126	# __strong _em__
				127	SMART_STRONG_EM_RE = r'(?<!\w)(\_)\1(?!\1)(.+?)(?<!\w)\1(?!\1)(.+?)\1{3}(?!\w)'
				128
				129	# *strongem* or **emstrong**
				130	EM_STRONG_RE = r'(\)\1{2}(.+?)\1(.?)\1{2}'
				131
				132	# ___strongem___ or ___em_strong__
				133	EM_STRONG2_RE = r'(_)\1{2}(.+?)\1(.*?)\1{2}'
				134
				135	# *strongem*
				136	STRONG_EM_RE = r'(\)\1{2}(.+?)\1{2}(.?)\1'
				137
				138	# ___strong__em_
				139	STRONG_EM2_RE = r'(_)\1{2}(.+?)\1{2}(.*?)\1'
				140
				141	# *strongem***
				142	STRONG_EM3_RE = r'(\)\1(?!\1)([^]+?)\1(?!\1)(.+?)\1{3}'
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	143
				144	# [text](url) or [text](<url>) or [text](url "title")
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	145	LINK_RE = NOIMG + r'\['
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	146
				147	# ![alttxt](http://x.com/) or ![alttxt](<http://x.com/>)
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	148	IMAGE_LINK_RE = r'\!\['
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	149
				150	# [Google][3]
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	151	REFERENCE_RE = LINK_RE
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	152
				153	# ![alt text][2]
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	154	IMAGE_REFERENCE_RE = IMAGE_LINK_RE
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	155
				156	# stand-alone * or _
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	157	NOT_STRONG_RE = r'((^\|\s)(\*\|_)(\s\|$))'
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	158
				159	# <http://www.123.com>
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	160	AUTOLINK_RE = r'<((?:[Ff]\|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>'
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	161
				162	# <me@example.com>
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	163	AUTOMAIL_RE = r'<([^<> !]@[^@<> ])>'
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	164
				165	# <...>
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	166	HTML_RE = r'(<([a-zA-Z/][^<>]\|!--(?:(?!<!--\|-->).)--)>)'
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	167
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	168	# "&" (decimal) or "&" (hex) or "&" (named)
				169	ENTITY_RE = r'(&(?:\#[0-9]+\|\#x[0-9a-fA-F]+\|[a-zA-Z0-9]+);)'
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	170
				171	# two spaces at end of line
				172	LINE_BREAK_RE = r' \n'
				173
				174
				175	def dequote(string):
				176	"""Remove quotes from around a string."""
				177	if ((string.startswith('"') and string.endswith('"')) or
				178	(string.startswith("'") and string.endswith("'"))):
				179	return string[1:-1]
				180	else:
				181	return string
				182
				183
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	184	class EmStrongItem(namedtuple('EmStrongItem', ['pattern', 'builder', 'tags'])):
				185	"""Emphasis/strong pattern item."""
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	186
				187
				188	"""
				189	The pattern classes
				190	-----------------------------------------------------------------------------
				191	"""
				192
				193
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	194	class Pattern: # pragma: no cover
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	195	"""Base class that inline patterns subclass. """
				196
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	197	ANCESTOR_EXCLUDES = tuple()
				198
				199	def __init__(self, pattern, md=None):
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	200	"""
				201	Create an instant of an inline pattern.
				202
				203	Keyword arguments:
				204
				205	* pattern: A regular expression that matches a pattern
				206
				207	"""
				208	self.pattern = pattern
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	209	self.compiled_re = re.compile(r"^(.?)%s(.)$" % pattern,
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	210	re.DOTALL \| re.UNICODE)
				211
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	212	self.md = md
				213
				214	@property
				215	@util.deprecated("Use 'md' instead.")
				216	def markdown(self):
				217	# TODO: remove this later
				218	return self.md
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	219
				220	def getCompiledRegExp(self):
				221	""" Return a compiled regular expression. """
				222	return self.compiled_re
				223
				224	def handleMatch(self, m):
				225	"""Return a ElementTree element from the given match.
				226
				227	Subclasses should override this method.
				228
				229	Keyword arguments:
				230
				231	* m: A re match object containing a match of the pattern.
				232
				233	"""
				234	pass # pragma: no cover
				235
				236	def type(self):
				237	""" Return class name, to define pattern type """
				238	return self.__class__.__name__
				239
				240	def unescape(self, text):
				241	""" Return unescaped text given text with an inline placeholder. """
				242	try:
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	243	stash = self.md.treeprocessors['inline'].stashed_nodes
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	244	except KeyError: # pragma: no cover
				245	return text
				246
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	247	def get_stash(m):
				248	id = m.group(1)
				249	if id in stash:
				250	value = stash.get(id)
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	251	if isinstance(value, str):
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	252	return value
				253	else:
				254	# An etree Element - return text content only
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	255	return ''.join(value.itertext())
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	256	return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
				257
				258
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	259	class InlineProcessor(Pattern):
				260	"""
				261	Base class that inline patterns subclass.
				262
				263	This is the newer style inline processor that uses a more
				264	efficient and flexible search approach.
				265	"""
				266
				267	def __init__(self, pattern, md=None):
				268	"""
				269	Create an instant of an inline pattern.
				270
				271	Keyword arguments:
				272
				273	* pattern: A regular expression that matches a pattern
				274
				275	"""
				276	self.pattern = pattern
				277	self.compiled_re = re.compile(pattern, re.DOTALL \| re.UNICODE)
				278
				279	# Api for Markdown to pass safe_mode into instance
				280	self.safe_mode = False
				281	self.md = md
				282
				283	def handleMatch(self, m, data):
				284	"""Return a ElementTree element from the given match and the
				285	start and end index of the matched text.
				286
				287	If `start` and/or `end` are returned as `None`, it will be
				288	assumed that the processor did not find a valid region of text.
				289
				290	Subclasses should override this method.
				291
				292	Keyword arguments:
				293
				294	* m: A re match object containing a match of the pattern.
				295	* data: The buffer current under analysis
				296
				297	Returns:
				298
				299	* el: The ElementTree element, text or None.
				300	* start: The start of the region that has been matched or None.
				301	* end: The end of the region that has been matched or None.
				302
				303	"""
				304	pass # pragma: no cover
				305
				306
				307	class SimpleTextPattern(Pattern): # pragma: no cover
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	308	""" Return a simple text of group(2) of a Pattern. """
				309	def handleMatch(self, m):
				310	return m.group(2)
				311
				312
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	313	class SimpleTextInlineProcessor(InlineProcessor):
				314	""" Return a simple text of group(1) of a Pattern. """
				315	def handleMatch(self, m, data):
				316	return m.group(1), m.start(0), m.end(0)
				317
				318
				319	class EscapeInlineProcessor(InlineProcessor):
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	320	""" Return an escaped character. """
				321
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	322	def handleMatch(self, m, data):
				323	char = m.group(1)
				324	if char in self.md.ESCAPED_CHARS:
				325	return '{}{}{}'.format(util.STX, ord(char), util.ETX), m.start(0), m.end(0)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	326	else:
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	327	return None, m.start(0), m.end(0)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	328
				329
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	330	class SimpleTagPattern(Pattern): # pragma: no cover
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	331	"""
				332	Return element of type `tag` with a text attribute of group(3)
				333	of a Pattern.
				334
				335	"""
				336	def __init__(self, pattern, tag):
				337	Pattern.__init__(self, pattern)
				338	self.tag = tag
				339
				340	def handleMatch(self, m):
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	341	el = etree.Element(self.tag)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	342	el.text = m.group(3)
				343	return el
				344
				345
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	346	class SimpleTagInlineProcessor(InlineProcessor):
				347	"""
				348	Return element of type `tag` with a text attribute of group(2)
				349	of a Pattern.
				350
				351	"""
				352	def __init__(self, pattern, tag):
				353	InlineProcessor.__init__(self, pattern)
				354	self.tag = tag
				355
				356	def handleMatch(self, m, data): # pragma: no cover
				357	el = etree.Element(self.tag)
				358	el.text = m.group(2)
				359	return el, m.start(0), m.end(0)
				360
				361
				362	class SubstituteTagPattern(SimpleTagPattern): # pragma: no cover
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	363	""" Return an element of type `tag` with no children. """
				364	def handleMatch(self, m):
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	365	return etree.Element(self.tag)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	366
				367
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	368	class SubstituteTagInlineProcessor(SimpleTagInlineProcessor):
				369	""" Return an element of type `tag` with no children. """
				370	def handleMatch(self, m, data):
				371	return etree.Element(self.tag), m.start(0), m.end(0)
				372
				373
				374	class BacktickInlineProcessor(InlineProcessor):
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	375	""" Return a `<code>` element containing the matching text. """
				376	def __init__(self, pattern):
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	377	InlineProcessor.__init__(self, pattern)
				378	self.ESCAPED_BSLASH = '{}{}{}'.format(util.STX, ord('\\'), util.ETX)
				379	self.tag = 'code'
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	380
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	381	def handleMatch(self, m, data):
				382	if m.group(3):
				383	el = etree.Element(self.tag)
				384	el.text = util.AtomicString(util.code_escape(m.group(3).strip()))
				385	return el, m.start(0), m.end(0)
				386	else:
				387	return m.group(1).replace('\\\\', self.ESCAPED_BSLASH), m.start(0), m.end(0)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	388
				389
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	390	class DoubleTagPattern(SimpleTagPattern): # pragma: no cover
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	391	"""Return a ElementTree element nested in tag2 nested in tag1.
				392
				393	Useful for strong emphasis etc.
				394
				395	"""
				396	def handleMatch(self, m):
				397	tag1, tag2 = self.tag.split(",")
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	398	el1 = etree.Element(tag1)
				399	el2 = etree.SubElement(el1, tag2)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	400	el2.text = m.group(3)
				401	if len(m.groups()) == 5:
				402	el2.tail = m.group(4)
				403	return el1
				404
				405
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	406	class DoubleTagInlineProcessor(SimpleTagInlineProcessor):
				407	"""Return a ElementTree element nested in tag2 nested in tag1.
				408
				409	Useful for strong emphasis etc.
				410
				411	"""
				412	def handleMatch(self, m, data): # pragma: no cover
				413	tag1, tag2 = self.tag.split(",")
				414	el1 = etree.Element(tag1)
				415	el2 = etree.SubElement(el1, tag2)
				416	el2.text = m.group(2)
				417	if len(m.groups()) == 3:
				418	el2.tail = m.group(3)
				419	return el1, m.start(0), m.end(0)
				420
				421
				422	class HtmlInlineProcessor(InlineProcessor):
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	423	""" Store raw inline html and return a placeholder. """
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	424	def handleMatch(self, m, data):
				425	rawhtml = self.unescape(m.group(1))
				426	place_holder = self.md.htmlStash.store(rawhtml)
				427	return place_holder, m.start(0), m.end(0)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	428
				429	def unescape(self, text):
				430	""" Return unescaped text given text with an inline placeholder. """
				431	try:
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	432	stash = self.md.treeprocessors['inline'].stashed_nodes
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	433	except KeyError: # pragma: no cover
				434	return text
				435
				436	def get_stash(m):
				437	id = m.group(1)
				438	value = stash.get(id)
				439	if value is not None:
				440	try:
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	441	return self.md.serializer(value)
				442	except Exception:
				443	return r'\%s' % value
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	444
				445	return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
				446
				447
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	448	class AsteriskProcessor(InlineProcessor):
				449	"""Emphasis processor for handling strong and em matches inside asterisks."""
				450
				451	PATTERNS = [
				452	EmStrongItem(re.compile(EM_STRONG_RE, re.DOTALL \| re.UNICODE), 'double', 'strong,em'),
				453	EmStrongItem(re.compile(STRONG_EM_RE, re.DOTALL \| re.UNICODE), 'double', 'em,strong'),
				454	EmStrongItem(re.compile(STRONG_EM3_RE, re.DOTALL \| re.UNICODE), 'double2', 'strong,em'),
				455	EmStrongItem(re.compile(STRONG_RE, re.DOTALL \| re.UNICODE), 'single', 'strong'),
				456	EmStrongItem(re.compile(EMPHASIS_RE, re.DOTALL \| re.UNICODE), 'single', 'em')
				457	]
				458
				459	def build_single(self, m, tag, idx):
				460	"""Return single tag."""
				461	el1 = etree.Element(tag)
				462	text = m.group(2)
				463	self.parse_sub_patterns(text, el1, None, idx)
				464	return el1
				465
				466	def build_double(self, m, tags, idx):
				467	"""Return double tag."""
				468
				469	tag1, tag2 = tags.split(",")
				470	el1 = etree.Element(tag1)
				471	el2 = etree.Element(tag2)
				472	text = m.group(2)
				473	self.parse_sub_patterns(text, el2, None, idx)
				474	el1.append(el2)
				475	if len(m.groups()) == 3:
				476	text = m.group(3)
				477	self.parse_sub_patterns(text, el1, el2, idx)
				478	return el1
				479
				480	def build_double2(self, m, tags, idx):
				481	"""Return double tags (variant 2): `<strong>text <em>text</em></strong>`."""
				482
				483	tag1, tag2 = tags.split(",")
				484	el1 = etree.Element(tag1)
				485	el2 = etree.Element(tag2)
				486	text = m.group(2)
				487	self.parse_sub_patterns(text, el1, None, idx)
				488	text = m.group(3)
				489	el1.append(el2)
				490	self.parse_sub_patterns(text, el2, None, idx)
				491	return el1
				492
				493	def parse_sub_patterns(self, data, parent, last, idx):
				494	"""
				495	Parses sub patterns.
				496
				497	`data` (`str`):
				498	text to evaluate.
				499
				500	`parent` (`etree.Element`):
				501	Parent to attach text and sub elements to.
				502
				503	`last` (`etree.Element`):
				504	Last appended child to parent. Can also be None if parent has no children.
				505
				506	`idx` (`int`):
				507	Current pattern index that was used to evaluate the parent.
				508
				509	"""
				510
				511	offset = 0
				512	pos = 0
				513
				514	length = len(data)
				515	while pos < length:
				516	# Find the start of potential emphasis or strong tokens
				517	if self.compiled_re.match(data, pos):
				518	matched = False
				519	# See if the we can match an emphasis/strong pattern
				520	for index, item in enumerate(self.PATTERNS):
				521	# Only evaluate patterns that are after what was used on the parent
				522	if index <= idx:
				523	continue
				524	m = item.pattern.match(data, pos)
				525	if m:
				526	# Append child nodes to parent
				527	# Text nodes should be appended to the last
				528	# child if present, and if not, it should
				529	# be added as the parent's text node.
				530	text = data[offset:m.start(0)]
				531	if text:
				532	if last is not None:
				533	last.tail = text
				534	else:
				535	parent.text = text
				536	el = self.build_element(m, item.builder, item.tags, index)
				537	parent.append(el)
				538	last = el
				539	# Move our position past the matched hunk
				540	offset = pos = m.end(0)
				541	matched = True
				542	if not matched:
				543	# We matched nothing, move on to the next character
				544	pos += 1
				545	else:
				546	# Increment position as no potential emphasis start was found.
				547	pos += 1
				548
				549	# Append any leftover text as a text node.
				550	text = data[offset:]
				551	if text:
				552	if last is not None:
				553	last.tail = text
				554	else:
				555	parent.text = text
				556
				557	def build_element(self, m, builder, tags, index):
				558	"""Element builder."""
				559
				560	if builder == 'double2':
				561	return self.build_double2(m, tags, index)
				562	elif builder == 'double':
				563	return self.build_double(m, tags, index)
				564	else:
				565	return self.build_single(m, tags, index)
				566
				567	def handleMatch(self, m, data):
				568	"""Parse patterns."""
				569
				570	el = None
				571	start = None
				572	end = None
				573
				574	for index, item in enumerate(self.PATTERNS):
				575	m1 = item.pattern.match(data, m.start(0))
				576	if m1:
				577	start = m1.start(0)
				578	end = m1.end(0)
				579	el = self.build_element(m1, item.builder, item.tags, index)
				580	break
				581	return el, start, end
				582
				583
				584	class UnderscoreProcessor(AsteriskProcessor):
				585	"""Emphasis processor for handling strong and em matches inside underscores."""
				586
				587	PATTERNS = [
				588	EmStrongItem(re.compile(EM_STRONG2_RE, re.DOTALL \| re.UNICODE), 'double', 'strong,em'),
				589	EmStrongItem(re.compile(STRONG_EM2_RE, re.DOTALL \| re.UNICODE), 'double', 'em,strong'),
				590	EmStrongItem(re.compile(SMART_STRONG_EM_RE, re.DOTALL \| re.UNICODE), 'double2', 'strong,em'),
				591	EmStrongItem(re.compile(SMART_STRONG_RE, re.DOTALL \| re.UNICODE), 'single', 'strong'),
				592	EmStrongItem(re.compile(SMART_EMPHASIS_RE, re.DOTALL \| re.UNICODE), 'single', 'em')
				593	]
				594
				595
				596	class LinkInlineProcessor(InlineProcessor):
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	597	""" Return a link element from the given match. """
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	598	RE_LINK = re.compile(r'''$\s(?:(<[^<>]>)\s(?:('[^']'\|"[^"]")\s)?$)?''', re.DOTALL \| re.UNICODE)
				599	RE_TITLE_CLEAN = re.compile(r'\s')
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	600
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	601	def handleMatch(self, m, data):
				602	text, index, handled = self.getText(data, m.end(0))
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	603
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	604	if not handled:
				605	return None, None, None
				606
				607	href, title, index, handled = self.getLink(data, index)
				608	if not handled:
				609	return None, None, None
				610
				611	el = etree.Element("a")
				612	el.text = text
				613
				614	el.set("href", href)
				615
				616	if title is not None:
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	617	el.set("title", title)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	618
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	619	return el, m.start(0), index
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	620
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	621	def getLink(self, data, index):
				622	"""Parse data between `()` of `[Text]()` allowing recursive `()`. """
				623
				624	href = ''
				625	title = None
				626	handled = False
				627
				628	m = self.RE_LINK.match(data, pos=index)
				629	if m and m.group(1):
				630	# Matches [Text](<link> "title")
				631	href = m.group(1)[1:-1].strip()
				632	if m.group(2):
				633	title = m.group(2)[1:-1]
				634	index = m.end(0)
				635	handled = True
				636	elif m:
				637	# Track bracket nesting and index in string
				638	bracket_count = 1
				639	backtrack_count = 1
				640	start_index = m.end()
				641	index = start_index
				642	last_bracket = -1
				643
				644	# Primary (first found) quote tracking.
				645	quote = None
				646	start_quote = -1
				647	exit_quote = -1
				648	ignore_matches = False
				649
				650	# Secondary (second found) quote tracking.
				651	alt_quote = None
				652	start_alt_quote = -1
				653	exit_alt_quote = -1
				654
				655	# Track last character
				656	last = ''
				657
				658	for pos in range(index, len(data)):
				659	c = data[pos]
				660	if c == '(':
				661	# Count nested (
				662	# Don't increment the bracket count if we are sure we're in a title.
				663	if not ignore_matches:
				664	bracket_count += 1
				665	elif backtrack_count > 0:
				666	backtrack_count -= 1
				667	elif c == ')':
				668	# Match nested ) to (
				669	# Don't decrement if we are sure we are in a title that is unclosed.
				670	if ((exit_quote != -1 and quote == last) or (exit_alt_quote != -1 and alt_quote == last)):
				671	bracket_count = 0
				672	elif not ignore_matches:
				673	bracket_count -= 1
				674	elif backtrack_count > 0:
				675	backtrack_count -= 1
				676	# We've found our backup end location if the title doesn't reslove.
				677	if backtrack_count == 0:
				678	last_bracket = index + 1
				679
				680	elif c in ("'", '"'):
				681	# Quote has started
				682	if not quote:
				683	# We'll assume we are now in a title.
				684	# Brackets are quoted, so no need to match them (except for the final one).
				685	ignore_matches = True
				686	backtrack_count = bracket_count
				687	bracket_count = 1
				688	start_quote = index + 1
				689	quote = c
				690	# Secondary quote (in case the first doesn't resolve): [text](link'"title")
				691	elif c != quote and not alt_quote:
				692	start_alt_quote = index + 1
				693	alt_quote = c
				694	# Update primary quote match
				695	elif c == quote:
				696	exit_quote = index + 1
				697	# Update secondary quote match
				698	elif alt_quote and c == alt_quote:
				699	exit_alt_quote = index + 1
				700
				701	index += 1
				702
				703	# Link is closed, so let's break out of the loop
				704	if bracket_count == 0:
				705	# Get the title if we closed a title string right before link closed
				706	if exit_quote >= 0 and quote == last:
				707	href = data[start_index:start_quote - 1]
				708	title = ''.join(data[start_quote:exit_quote - 1])
				709	elif exit_alt_quote >= 0 and alt_quote == last:
				710	href = data[start_index:start_alt_quote - 1]
				711	title = ''.join(data[start_alt_quote:exit_alt_quote - 1])
				712	else:
				713	href = data[start_index:index - 1]
				714	break
				715
				716	if c != ' ':
				717	last = c
				718
				719	# We have a scenario: [test](link"notitle)
				720	# When we enter a string, we stop tracking bracket resolution in the main counter,
				721	# but we do keep a backup counter up until we discover where we might resolve all brackets
				722	# if the title string fails to resolve.
				723	if bracket_count != 0 and backtrack_count == 0:
				724	href = data[start_index:last_bracket - 1]
				725	index = last_bracket
				726	bracket_count = 0
				727
				728	handled = bracket_count == 0
				729
				730	if title is not None:
				731	title = self.RE_TITLE_CLEAN.sub(' ', dequote(self.unescape(title.strip())))
				732
				733	href = self.unescape(href).strip()
				734
				735	return href, title, index, handled
				736
				737	def getText(self, data, index):
				738	"""Parse the content between `[]` of the start of an image or link
				739	resolving nested square brackets.
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	740
				741	"""
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	742	bracket_count = 1
				743	text = []
				744	for pos in range(index, len(data)):
				745	c = data[pos]
				746	if c == ']':
				747	bracket_count -= 1
				748	elif c == '[':
				749	bracket_count += 1
				750	index += 1
				751	if bracket_count == 0:
				752	break
				753	text.append(c)
				754	return ''.join(text), index, bracket_count == 0
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	755
				756
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	757	class ImageInlineProcessor(LinkInlineProcessor):
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	758	""" Return a img element from the given match. """
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	759
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	760	def handleMatch(self, m, data):
				761	text, index, handled = self.getText(data, m.end(0))
				762	if not handled:
				763	return None, None, None
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	764
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	765	src, title, index, handled = self.getLink(data, index)
				766	if not handled:
				767	return None, None, None
				768
				769	el = etree.Element("img")
				770
				771	el.set("src", src)
				772
				773	if title is not None:
				774	el.set("title", title)
				775
				776	el.set('alt', self.unescape(text))
				777	return el, m.start(0), index
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	778
				779
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	780	class ReferenceInlineProcessor(LinkInlineProcessor):
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	781	""" Match to a stored reference and return link element. """
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	782	NEWLINE_CLEANUP_RE = re.compile(r'\s+', re.MULTILINE)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	783
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	784	RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL \| re.UNICODE)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	785
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	786	def handleMatch(self, m, data):
				787	text, index, handled = self.getText(data, m.end(0))
				788	if not handled:
				789	return None, None, None
				790
				791	id, end, handled = self.evalId(data, index, text)
				792	if not handled:
				793	return None, None, None
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	794
				795	# Clean up linebreaks in id
				796	id = self.NEWLINE_CLEANUP_RE.sub(' ', id)
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	797	if id not in self.md.references: # ignore undefined refs
				798	return None, m.start(0), end
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	799
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	800	href, title = self.md.references[id]
				801
				802	return self.makeTag(href, title, text), m.start(0), end
				803
				804	def evalId(self, data, index, text):
				805	"""
				806	Evaluate the id portion of [ref][id].
				807
				808	If [ref][] use [ref].
				809	"""
				810	m = self.RE_LINK.match(data, pos=index)
				811	if not m:
				812	return None, index, False
				813	else:
				814	id = m.group(1).lower()
				815	end = m.end(0)
				816	if not id:
				817	id = text.lower()
				818	return id, end, True
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	819
				820	def makeTag(self, href, title, text):
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	821	el = etree.Element('a')
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	822
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	823	el.set('href', href)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	824	if title:
				825	el.set('title', title)
				826
				827	el.text = text
				828	return el
				829
				830
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	831	class ShortReferenceInlineProcessor(ReferenceInlineProcessor):
				832	"""Short form of reference: [google]. """
				833	def evalId(self, data, index, text):
				834	"""Evaluate the id from of [ref] """
				835
				836	return text.lower(), index, True
				837
				838
				839	class ImageReferenceInlineProcessor(ReferenceInlineProcessor):
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	840	""" Match to a stored reference and return img element. """
				841	def makeTag(self, href, title, text):
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	842	el = etree.Element("img")
				843	el.set("src", href)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	844	if title:
				845	el.set("title", title)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	846	el.set("alt", self.unescape(text))
				847	return el
				848
				849
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	850	class ShortImageReferenceInlineProcessor(ImageReferenceInlineProcessor):
				851	""" Short form of inage reference: ![ref]. """
				852	def evalId(self, data, index, text):
				853	"""Evaluate the id from of [ref] """
				854
				855	return text.lower(), index, True
				856
				857
				858	class AutolinkInlineProcessor(InlineProcessor):
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	859	""" Return a link Element given an autolink (`<http://example/com>`). """
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	860	def handleMatch(self, m, data):
				861	el = etree.Element("a")
				862	el.set('href', self.unescape(m.group(1)))
				863	el.text = util.AtomicString(m.group(1))
				864	return el, m.start(0), m.end(0)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	865
				866
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	867	class AutomailInlineProcessor(InlineProcessor):
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	868	"""
				869	Return a mailto link Element given an automail link (`<foo@example.com>`).
				870	"""
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	871	def handleMatch(self, m, data):
				872	el = etree.Element('a')
				873	email = self.unescape(m.group(1))
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	874	if email.startswith("mailto:"):
				875	email = email[len("mailto:"):]
				876
				877	def codepoint2name(code):
				878	"""Return entity definition by code, or the code if not defined."""
				879	entity = entities.codepoint2name.get(code)
				880	if entity:
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	881	return "{}{};".format(util.AMP_SUBSTITUTE, entity)
dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame]	882	else:
				883	return "%s#%d;" % (util.AMP_SUBSTITUTE, code)
				884
				885	letters = [codepoint2name(ord(letter)) for letter in email]
				886	el.text = util.AtomicString(''.join(letters))
				887
				888	mailto = "mailto:" + email
				889	mailto = "".join([util.AMP_SUBSTITUTE + '#%d;' %
				890	ord(letter) for letter in mailto])
				891	el.set('href', mailto)
Yu-Ping Wu	6a8f3a2	2021-11-24 00:45:03 +0000	[diff] [blame^]	892	return el, m.start(0), m.end(0)