Blame - markdown/serializers.py - chromium.googlesource.com/chromium/src/third_party/Python-Markdown

blob: 1e8d9dd288f00ea0d5e001a22eb5004001cbcbf5 [file] [log] [blame]

dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame^]	1	# markdown/searializers.py
				2	#
				3	# Add x/html serialization to Elementree
				4	# Taken from ElementTree 1.3 preview with slight modifications
				5	#
				6	# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
				7	#
				8	# fredrik@pythonware.com
				9	# http://www.pythonware.com
				10	#
				11	# --------------------------------------------------------------------
				12	# The ElementTree toolkit is
				13	#
				14	# Copyright (c) 1999-2007 by Fredrik Lundh
				15	#
				16	# By obtaining, using, and/or copying this software and/or its
				17	# associated documentation, you agree that you have read, understood,
				18	# and will comply with the following terms and conditions:
				19	#
				20	# Permission to use, copy, modify, and distribute this software and
				21	# its associated documentation for any purpose and without fee is
				22	# hereby granted, provided that the above copyright notice appears in
				23	# all copies, and that both that copyright notice and this permission
				24	# notice appear in supporting documentation, and that the name of
				25	# Secret Labs AB or the author not be used in advertising or publicity
				26	# pertaining to distribution of the software without specific, written
				27	# prior permission.
				28	#
				29	# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
				30	# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
				31	# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
				32	# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
				33	# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
				34	# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
				35	# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
				36	# OF THIS SOFTWARE.
				37	# --------------------------------------------------------------------
				38
				39
				40	from __future__ import absolute_import
				41	from __future__ import unicode_literals
				42	from . import util
				43	ElementTree = util.etree.ElementTree
				44	QName = util.etree.QName
				45	if hasattr(util.etree, 'test_comment'): # pragma: no cover
				46	Comment = util.etree.test_comment
				47	else: # pragma: no cover
				48	Comment = util.etree.Comment
				49	PI = util.etree.PI
				50	ProcessingInstruction = util.etree.ProcessingInstruction
				51
				52	__all__ = ['to_html_string', 'to_xhtml_string']
				53
				54	HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
				55	"img", "input", "isindex", "link", "meta" "param")
				56
				57	try:
				58	HTML_EMPTY = set(HTML_EMPTY)
				59	except NameError: # pragma: no cover
				60	pass
				61
				62	_namespace_map = {
				63	# "well-known" namespace prefixes
				64	"http://www.w3.org/XML/1998/namespace": "xml",
				65	"http://www.w3.org/1999/xhtml": "html",
				66	"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
				67	"http://schemas.xmlsoap.org/wsdl/": "wsdl",
				68	# xml schema
				69	"http://www.w3.org/2001/XMLSchema": "xs",
				70	"http://www.w3.org/2001/XMLSchema-instance": "xsi",
				71	# dublic core
				72	"http://purl.org/dc/elements/1.1/": "dc",
				73	}
				74
				75
				76	def _raise_serialization_error(text): # pragma: no cover
				77	raise TypeError(
				78	"cannot serialize %r (type %s)" % (text, type(text).__name__)
				79	)
				80
				81
				82	def _encode(text, encoding):
				83	try:
				84	return text.encode(encoding, "xmlcharrefreplace")
				85	except (TypeError, AttributeError): # pragma: no cover
				86	_raise_serialization_error(text)
				87
				88
				89	def _escape_cdata(text):
				90	# escape character data
				91	try:
				92	# it's worth avoiding do-nothing calls for strings that are
				93	# shorter than 500 character, or so. assume that's, by far,
				94	# the most common case in most applications.
				95	if "&" in text:
				96	text = text.replace("&", "&")
				97	if "<" in text:
				98	text = text.replace("<", "<")
				99	if ">" in text:
				100	text = text.replace(">", ">")
				101	return text
				102	except (TypeError, AttributeError): # pragma: no cover
				103	_raise_serialization_error(text)
				104
				105
				106	def _escape_attrib(text):
				107	# escape attribute value
				108	try:
				109	if "&" in text:
				110	text = text.replace("&", "&")
				111	if "<" in text:
				112	text = text.replace("<", "<")
				113	if ">" in text:
				114	text = text.replace(">", ">")
				115	if "\"" in text:
				116	text = text.replace("\"", """)
				117	if "\n" in text:
				118	text = text.replace("\n", " ")
				119	return text
				120	except (TypeError, AttributeError): # pragma: no cover
				121	_raise_serialization_error(text)
				122
				123
				124	def _escape_attrib_html(text):
				125	# escape attribute value
				126	try:
				127	if "&" in text:
				128	text = text.replace("&", "&")
				129	if "<" in text:
				130	text = text.replace("<", "<")
				131	if ">" in text:
				132	text = text.replace(">", ">")
				133	if "\"" in text:
				134	text = text.replace("\"", """)
				135	return text
				136	except (TypeError, AttributeError): # pragma: no cover
				137	_raise_serialization_error(text)
				138
				139
				140	def _serialize_html(write, elem, qnames, namespaces, format):
				141	tag = elem.tag
				142	text = elem.text
				143	if tag is Comment:
				144	write("<!--%s-->" % _escape_cdata(text))
				145	elif tag is ProcessingInstruction:
				146	write("<?%s?>" % _escape_cdata(text))
				147	else:
				148	tag = qnames[tag]
				149	if tag is None:
				150	if text:
				151	write(_escape_cdata(text))
				152	for e in elem:
				153	_serialize_html(write, e, qnames, None, format)
				154	else:
				155	write("<" + tag)
				156	items = elem.items()
				157	if items or namespaces:
				158	items = sorted(items) # lexical order
				159	for k, v in items:
				160	if isinstance(k, QName):
				161	k = k.text
				162	if isinstance(v, QName):
				163	v = qnames[v.text]
				164	else:
				165	v = _escape_attrib_html(v)
				166	if qnames[k] == v and format == 'html':
				167	# handle boolean attributes
				168	write(" %s" % v)
				169	else:
				170	write(" %s=\"%s\"" % (qnames[k], v))
				171	if namespaces:
				172	items = namespaces.items()
				173	items.sort(key=lambda x: x[1]) # sort on prefix
				174	for v, k in items:
				175	if k:
				176	k = ":" + k
				177	write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))
				178	if format == "xhtml" and tag.lower() in HTML_EMPTY:
				179	write(" />")
				180	else:
				181	write(">")
				182	if text:
				183	if tag.lower() in ["script", "style"]:
				184	write(text)
				185	else:
				186	write(_escape_cdata(text))
				187	for e in elem:
				188	_serialize_html(write, e, qnames, None, format)
				189	if tag.lower() not in HTML_EMPTY:
				190	write("</" + tag + ">")
				191	if elem.tail:
				192	write(_escape_cdata(elem.tail))
				193
				194
				195	def _write_html(root,
				196	encoding=None,
				197	default_namespace=None,
				198	format="html"):
				199	assert root is not None
				200	data = []
				201	write = data.append
				202	qnames, namespaces = _namespaces(root, default_namespace)
				203	_serialize_html(write, root, qnames, namespaces, format)
				204	if encoding is None:
				205	return "".join(data)
				206	else:
				207	return _encode("".join(data))
				208
				209
				210	# --------------------------------------------------------------------
				211	# serialization support
				212
				213	def _namespaces(elem, default_namespace=None):
				214	# identify namespaces used in this tree
				215
				216	# maps qnames to encoded prefix:local names
				217	qnames = {None: None}
				218
				219	# maps uri:s to prefixes
				220	namespaces = {}
				221	if default_namespace:
				222	namespaces[default_namespace] = ""
				223
				224	def add_qname(qname):
				225	# calculate serialized qname representation
				226	try:
				227	if qname[:1] == "{":
				228	uri, tag = qname[1:].split("}", 1)
				229	prefix = namespaces.get(uri)
				230	if prefix is None:
				231	prefix = _namespace_map.get(uri)
				232	if prefix is None:
				233	prefix = "ns%d" % len(namespaces)
				234	if prefix != "xml":
				235	namespaces[uri] = prefix
				236	if prefix:
				237	qnames[qname] = "%s:%s" % (prefix, tag)
				238	else:
				239	qnames[qname] = tag # default element
				240	else:
				241	if default_namespace:
				242	raise ValueError(
				243	"cannot use non-qualified names with "
				244	"default_namespace option"
				245	)
				246	qnames[qname] = qname
				247	except TypeError: # pragma: no cover
				248	_raise_serialization_error(qname)
				249
				250	# populate qname and namespaces table
				251	try:
				252	iterate = elem.iter
				253	except AttributeError:
				254	iterate = elem.getiterator # cET compatibility
				255	for elem in iterate():
				256	tag = elem.tag
				257	if isinstance(tag, QName) and tag.text not in qnames:
				258	add_qname(tag.text)
				259	elif isinstance(tag, util.string_type):
				260	if tag not in qnames:
				261	add_qname(tag)
				262	elif tag is not None and tag is not Comment and tag is not PI:
				263	_raise_serialization_error(tag)
				264	for key, value in elem.items():
				265	if isinstance(key, QName):
				266	key = key.text
				267	if key not in qnames:
				268	add_qname(key)
				269	if isinstance(value, QName) and value.text not in qnames:
				270	add_qname(value.text)
				271	text = elem.text
				272	if isinstance(text, QName) and text.text not in qnames:
				273	add_qname(text.text)
				274	return qnames, namespaces
				275
				276
				277	def to_html_string(element):
				278	return _write_html(ElementTree(element).getroot(), format="html")
				279
				280
				281	def to_xhtml_string(element):
				282	return _write_html(ElementTree(element).getroot(), format="xhtml")