Blame - markdown/util.py - chromium.googlesource.com/chromium/src/third_party/Python-Markdown

blob: d3d48f099940521431520bbc42eb466fd099e1fb [file] [log] [blame]

dpranke	b08af21	2015-10-06 17:44:36 -0700	[diff] [blame^]	1	# -- coding: utf-8 --
				2	from __future__ import unicode_literals
				3	import re
				4	import sys
				5
				6
				7	"""
				8	Python 3 Stuff
				9	=============================================================================
				10	"""
				11	PY3 = sys.version_info[0] == 3
				12
				13	if PY3: # pragma: no cover
				14	string_type = str
				15	text_type = str
				16	int2str = chr
				17	else: # pragma: no cover
				18	string_type = basestring # noqa
				19	text_type = unicode # noqa
				20	int2str = unichr # noqa
				21
				22
				23	"""
				24	Constants you might want to modify
				25	-----------------------------------------------------------------------------
				26	"""
				27
				28
				29	BLOCK_LEVEL_ELEMENTS = re.compile(
				30	"^(p\|div\|h[1-6]\|blockquote\|pre\|table\|dl\|ol\|ul"
				31	"\|script\|noscript\|form\|fieldset\|iframe\|math"
				32	"\|hr\|hr/\|style\|li\|dt\|dd\|thead\|tbody"
				33	"\|tr\|th\|td\|section\|footer\|header\|group\|figure"
				34	"\|figcaption\|aside\|article\|canvas\|output"
				35	"\|progress\|video\|nav)$",
				36	re.IGNORECASE
				37	)
				38	# Placeholders
				39	STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder
				40	ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder
				41	INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
				42	INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
				43	INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)')
				44	AMP_SUBSTITUTE = STX+"amp"+ETX
				45	HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX
				46	HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')
				47	TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX
				48
				49
				50	"""
				51	Constants you probably do not need to change
				52	-----------------------------------------------------------------------------
				53	"""
				54
				55	RTL_BIDI_RANGES = (
				56	('\u0590', '\u07FF'),
				57	# Hebrew (0590-05FF), Arabic (0600-06FF),
				58	# Syriac (0700-074F), Arabic supplement (0750-077F),
				59	# Thaana (0780-07BF), Nko (07C0-07FF).
				60	('\u2D30', '\u2D7F') # Tifinagh
				61	)
				62
				63	# Extensions should use "markdown.util.etree" instead of "etree" (or do `from
				64	# markdown.util import etree`). Do not import it by yourself.
				65
				66	try: # pragma: no cover
				67	# Is the C implementation of ElementTree available?
				68	import xml.etree.cElementTree as etree
				69	from xml.etree.ElementTree import Comment
				70	# Serializers (including ours) test with non-c Comment
				71	etree.test_comment = Comment
				72	if etree.VERSION < "1.0.5":
				73	raise RuntimeError("cElementTree version 1.0.5 or higher is required.")
				74	except (ImportError, RuntimeError): # pragma: no cover
				75	# Use the Python implementation of ElementTree?
				76	import xml.etree.ElementTree as etree
				77	if etree.VERSION < "1.1":
				78	raise RuntimeError("ElementTree version 1.1 or higher is required")
				79
				80
				81	"""
				82	AUXILIARY GLOBAL FUNCTIONS
				83	=============================================================================
				84	"""
				85
				86
				87	def isBlockLevel(tag):
				88	"""Check if the tag is a block level HTML tag."""
				89	if isinstance(tag, string_type):
				90	return BLOCK_LEVEL_ELEMENTS.match(tag)
				91	# Some ElementTree tags are not strings, so return False.
				92	return False
				93
				94
				95	def parseBoolValue(value, fail_on_errors=True, preserve_none=False):
				96	"""Parses a string representing bool value. If parsing was successful,
				97	returns True or False. If preserve_none=True, returns True, False,
				98	or None. If parsing was not successful, raises ValueError, or, if
				99	fail_on_errors=False, returns None."""
				100	if not isinstance(value, string_type):
				101	if preserve_none and value is None:
				102	return value
				103	return bool(value)
				104	elif preserve_none and value.lower() == 'none':
				105	return None
				106	elif value.lower() in ('true', 'yes', 'y', 'on', '1'):
				107	return True
				108	elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'):
				109	return False
				110	elif fail_on_errors:
				111	raise ValueError('Cannot parse bool value: %r' % value)
				112
				113
				114	"""
				115	MISC AUXILIARY CLASSES
				116	=============================================================================
				117	"""
				118
				119
				120	class AtomicString(text_type):
				121	"""A string which should not be further processed."""
				122	pass
				123
				124
				125	class Processor(object):
				126	def __init__(self, markdown_instance=None):
				127	if markdown_instance:
				128	self.markdown = markdown_instance
				129
				130
				131	class HtmlStash(object):
				132	"""
				133	This class is used for stashing HTML objects that we extract
				134	in the beginning and replace with place-holders.
				135	"""
				136
				137	def __init__(self):
				138	""" Create a HtmlStash. """
				139	self.html_counter = 0 # for counting inline html segments
				140	self.rawHtmlBlocks = []
				141	self.tag_counter = 0
				142	self.tag_data = [] # list of dictionaries in the order tags appear
				143
				144	def store(self, html, safe=False):
				145	"""
				146	Saves an HTML segment for later reinsertion. Returns a
				147	placeholder string that needs to be inserted into the
				148	document.
				149
				150	Keyword arguments:
				151
				152	* html: an html segment
				153	* safe: label an html segment as safe for safemode
				154
				155	Returns : a placeholder string
				156
				157	"""
				158	self.rawHtmlBlocks.append((html, safe))
				159	placeholder = self.get_placeholder(self.html_counter)
				160	self.html_counter += 1
				161	return placeholder
				162
				163	def reset(self):
				164	self.html_counter = 0
				165	self.rawHtmlBlocks = []
				166
				167	def get_placeholder(self, key):
				168	return HTML_PLACEHOLDER % key
				169
				170	def store_tag(self, tag, attrs, left_index, right_index):
				171	"""Store tag data and return a placeholder."""
				172	self.tag_data.append({'tag': tag, 'attrs': attrs,
				173	'left_index': left_index,
				174	'right_index': right_index})
				175	placeholder = TAG_PLACEHOLDER % str(self.tag_counter)
				176	self.tag_counter += 1 # equal to the tag's index in self.tag_data
				177	return placeholder