You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
257 lines
9.8 KiB
257 lines
9.8 KiB
''' |
|
Smarty extension for Python-Markdown |
|
==================================== |
|
|
|
Adds conversion of ASCII dashes, quotes and ellipses to their HTML |
|
entity equivalents. |
|
|
|
See <https://Python-Markdown.github.io/extensions/smarty> |
|
for documentation. |
|
|
|
Author: 2013, Dmitry Shachnev <mitya57@gmail.com> |
|
|
|
All changes Copyright 2013-2014 The Python Markdown Project |
|
|
|
License: [BSD](https://opensource.org/licenses/bsd-license.php) |
|
|
|
SmartyPants license: |
|
|
|
Copyright (c) 2003 John Gruber <https://daringfireball.net/> |
|
All rights reserved. |
|
|
|
Redistribution and use in source and binary forms, with or without |
|
modification, are permitted provided that the following conditions are |
|
met: |
|
|
|
* Redistributions of source code must retain the above copyright |
|
notice, this list of conditions and the following disclaimer. |
|
|
|
* Redistributions in binary form must reproduce the above copyright |
|
notice, this list of conditions and the following disclaimer in |
|
the documentation and/or other materials provided with the |
|
distribution. |
|
|
|
* Neither the name "SmartyPants" nor the names of its contributors |
|
may be used to endorse or promote products derived from this |
|
software without specific prior written permission. |
|
|
|
This software is provided by the copyright holders and contributors "as |
|
is" and any express or implied warranties, including, but not limited |
|
to, the implied warranties of merchantability and fitness for a |
|
particular purpose are disclaimed. In no event shall the copyright |
|
owner or contributors be liable for any direct, indirect, incidental, |
|
special, exemplary, or consequential damages (including, but not |
|
limited to, procurement of substitute goods or services; loss of use, |
|
data, or profits; or business interruption) however caused and on any |
|
theory of liability, whether in contract, strict liability, or tort |
|
(including negligence or otherwise) arising in any way out of the use |
|
of this software, even if advised of the possibility of such damage. |
|
|
|
|
|
`smartypants.py` license: |
|
|
|
`smartypants.py` is a derivative work of SmartyPants. |
|
Copyright (c) 2004, 2007 Chad Miller <http://web.chad.org/> |
|
|
|
Redistribution and use in source and binary forms, with or without |
|
modification, are permitted provided that the following conditions are |
|
met: |
|
|
|
* Redistributions of source code must retain the above copyright |
|
notice, this list of conditions and the following disclaimer. |
|
|
|
* Redistributions in binary form must reproduce the above copyright |
|
notice, this list of conditions and the following disclaimer in |
|
the documentation and/or other materials provided with the |
|
distribution. |
|
|
|
This software is provided by the copyright holders and contributors "as |
|
is" and any express or implied warranties, including, but not limited |
|
to, the implied warranties of merchantability and fitness for a |
|
particular purpose are disclaimed. In no event shall the copyright |
|
owner or contributors be liable for any direct, indirect, incidental, |
|
special, exemplary, or consequential damages (including, but not |
|
limited to, procurement of substitute goods or services; loss of use, |
|
data, or profits; or business interruption) however caused and on any |
|
theory of liability, whether in contract, strict liability, or tort |
|
(including negligence or otherwise) arising in any way out of the use |
|
of this software, even if advised of the possibility of such damage. |
|
|
|
''' |
|
|
|
|
|
from . import Extension |
|
from ..inlinepatterns import HtmlInlineProcessor, HTML_RE |
|
from ..treeprocessors import InlineProcessor |
|
from ..util import Registry |
|
|
|
|
|
# Constants for quote education. |
|
punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" |
|
endOfWordClass = r"[\s.,;:!?)]" |
|
closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]" |
|
|
|
openingQuotesBase = ( |
|
r'(\s' # a whitespace char |
|
r'| ' # or a non-breaking space entity |
|
r'|--' # or dashes |
|
r'|–|—' # or Unicode |
|
r'|&[mn]dash;' # or named dash entities |
|
r'|–|—' # or decimal entities |
|
r')' |
|
) |
|
|
|
substitutions = { |
|
'mdash': '—', |
|
'ndash': '–', |
|
'ellipsis': '…', |
|
'left-angle-quote': '«', |
|
'right-angle-quote': '»', |
|
'left-single-quote': '‘', |
|
'right-single-quote': '’', |
|
'left-double-quote': '“', |
|
'right-double-quote': '”', |
|
} |
|
|
|
|
|
# Special case if the very first character is a quote |
|
# followed by punctuation at a non-word-break. Close the quotes by brute force: |
|
singleQuoteStartRe = r"^'(?=%s\B)" % punctClass |
|
doubleQuoteStartRe = r'^"(?=%s\B)' % punctClass |
|
|
|
# Special case for double sets of quotes, e.g.: |
|
# <p>He said, "'Quoted' words in a larger quote."</p> |
|
doubleQuoteSetsRe = r""""'(?=\w)""" |
|
singleQuoteSetsRe = r"""'"(?=\w)""" |
|
|
|
# Special case for decade abbreviations (the '80s): |
|
decadeAbbrRe = r"(?<!\w)'(?=\d{2}s)" |
|
|
|
# Get most opening double quotes: |
|
openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase |
|
|
|
# Double closing quotes: |
|
closingDoubleQuotesRegex = r'"(?=\s)' |
|
closingDoubleQuotesRegex2 = '(?<=%s)"' % closeClass |
|
|
|
# Get most opening single quotes: |
|
openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase |
|
|
|
# Single closing quotes: |
|
closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass |
|
closingSingleQuotesRegex2 = r"'(\s|s\b)" |
|
|
|
# All remaining quotes should be opening ones |
|
remainingSingleQuotesRegex = r"'" |
|
remainingDoubleQuotesRegex = r'"' |
|
|
|
HTML_STRICT_RE = HTML_RE + r'(?!\>)' |
|
|
|
|
|
class SubstituteTextPattern(HtmlInlineProcessor): |
|
def __init__(self, pattern, replace, md): |
|
""" Replaces matches with some text. """ |
|
HtmlInlineProcessor.__init__(self, pattern) |
|
self.replace = replace |
|
self.md = md |
|
|
|
def handleMatch(self, m, data): |
|
result = '' |
|
for part in self.replace: |
|
if isinstance(part, int): |
|
result += m.group(part) |
|
else: |
|
result += self.md.htmlStash.store(part) |
|
return result, m.start(0), m.end(0) |
|
|
|
|
|
class SmartyExtension(Extension): |
|
def __init__(self, **kwargs): |
|
self.config = { |
|
'smart_quotes': [True, 'Educate quotes'], |
|
'smart_angled_quotes': [False, 'Educate angled quotes'], |
|
'smart_dashes': [True, 'Educate dashes'], |
|
'smart_ellipses': [True, 'Educate ellipses'], |
|
'substitutions': [{}, 'Overwrite default substitutions'], |
|
} |
|
super().__init__(**kwargs) |
|
self.substitutions = dict(substitutions) |
|
self.substitutions.update(self.getConfig('substitutions', default={})) |
|
|
|
def _addPatterns(self, md, patterns, serie, priority): |
|
for ind, pattern in enumerate(patterns): |
|
pattern += (md,) |
|
pattern = SubstituteTextPattern(*pattern) |
|
name = 'smarty-%s-%d' % (serie, ind) |
|
self.inlinePatterns.register(pattern, name, priority-ind) |
|
|
|
def educateDashes(self, md): |
|
emDashesPattern = SubstituteTextPattern( |
|
r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md |
|
) |
|
enDashesPattern = SubstituteTextPattern( |
|
r'(?<!-)--(?!-)', (self.substitutions['ndash'],), md |
|
) |
|
self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50) |
|
self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45) |
|
|
|
def educateEllipses(self, md): |
|
ellipsesPattern = SubstituteTextPattern( |
|
r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md |
|
) |
|
self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10) |
|
|
|
def educateAngledQuotes(self, md): |
|
leftAngledQuotePattern = SubstituteTextPattern( |
|
r'\<\<', (self.substitutions['left-angle-quote'],), md |
|
) |
|
rightAngledQuotePattern = SubstituteTextPattern( |
|
r'\>\>', (self.substitutions['right-angle-quote'],), md |
|
) |
|
self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40) |
|
self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35) |
|
|
|
def educateQuotes(self, md): |
|
lsquo = self.substitutions['left-single-quote'] |
|
rsquo = self.substitutions['right-single-quote'] |
|
ldquo = self.substitutions['left-double-quote'] |
|
rdquo = self.substitutions['right-double-quote'] |
|
patterns = ( |
|
(singleQuoteStartRe, (rsquo,)), |
|
(doubleQuoteStartRe, (rdquo,)), |
|
(doubleQuoteSetsRe, (ldquo + lsquo,)), |
|
(singleQuoteSetsRe, (lsquo + ldquo,)), |
|
(decadeAbbrRe, (rsquo,)), |
|
(openingSingleQuotesRegex, (1, lsquo)), |
|
(closingSingleQuotesRegex, (rsquo,)), |
|
(closingSingleQuotesRegex2, (rsquo, 1)), |
|
(remainingSingleQuotesRegex, (lsquo,)), |
|
(openingDoubleQuotesRegex, (1, ldquo)), |
|
(closingDoubleQuotesRegex, (rdquo,)), |
|
(closingDoubleQuotesRegex2, (rdquo,)), |
|
(remainingDoubleQuotesRegex, (ldquo,)) |
|
) |
|
self._addPatterns(md, patterns, 'quotes', 30) |
|
|
|
def extendMarkdown(self, md): |
|
configs = self.getConfigs() |
|
self.inlinePatterns = Registry() |
|
if configs['smart_ellipses']: |
|
self.educateEllipses(md) |
|
if configs['smart_quotes']: |
|
self.educateQuotes(md) |
|
if configs['smart_angled_quotes']: |
|
self.educateAngledQuotes(md) |
|
# Override `HTML_RE` from `inlinepatterns.py` so that it does not |
|
# process tags with duplicate closing quotes. |
|
md.inlinePatterns.register(HtmlInlineProcessor(HTML_STRICT_RE, md), 'html', 90) |
|
if configs['smart_dashes']: |
|
self.educateDashes(md) |
|
inlineProcessor = InlineProcessor(md) |
|
inlineProcessor.inlinePatterns = self.inlinePatterns |
|
md.treeprocessors.register(inlineProcessor, 'smarty', 2) |
|
md.ESCAPED_CHARS.extend(['"', "'"]) |
|
|
|
|
|
def makeExtension(**kwargs): # pragma: no cover |
|
return SmartyExtension(**kwargs)
|
|
|