You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
99 lines
3.1 KiB
99 lines
3.1 KiB
''' |
|
Abbreviation Extension for Python-Markdown |
|
========================================== |
|
|
|
This extension adds abbreviation handling to Python-Markdown. |
|
|
|
See <https://Python-Markdown.github.io/extensions/abbreviations> |
|
for documentation. |
|
|
|
Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/) and |
|
[Seemant Kulleen](http://www.kulleen.org/) |
|
|
|
All changes Copyright 2008-2014 The Python Markdown Project |
|
|
|
License: [BSD](https://opensource.org/licenses/bsd-license.php) |
|
|
|
''' |
|
|
|
from . import Extension |
|
from ..blockprocessors import BlockProcessor |
|
from ..inlinepatterns import InlineProcessor |
|
from ..util import AtomicString |
|
import re |
|
import xml.etree.ElementTree as etree |
|
|
|
|
|
class AbbrExtension(Extension): |
|
""" Abbreviation Extension for Python-Markdown. """ |
|
|
|
def extendMarkdown(self, md): |
|
""" Insert `AbbrPreprocessor` before `ReferencePreprocessor`. """ |
|
md.parser.blockprocessors.register(AbbrPreprocessor(md.parser), 'abbr', 16) |
|
|
|
|
|
class AbbrPreprocessor(BlockProcessor): |
|
""" Abbreviation Preprocessor - parse text for abbr references. """ |
|
|
|
RE = re.compile(r'^[*]\[(?P<abbr>[^\]]*)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE) |
|
|
|
def test(self, parent, block): |
|
return True |
|
|
|
def run(self, parent, blocks): |
|
''' |
|
Find and remove all Abbreviation references from the text. |
|
Each reference is set as a new `AbbrPattern` in the markdown instance. |
|
|
|
''' |
|
block = blocks.pop(0) |
|
m = self.RE.search(block) |
|
if m: |
|
abbr = m.group('abbr').strip() |
|
title = m.group('title').strip() |
|
self.parser.md.inlinePatterns.register( |
|
AbbrInlineProcessor(self._generate_pattern(abbr), title), 'abbr-%s' % abbr, 2 |
|
) |
|
if block[m.end():].strip(): |
|
# Add any content after match back to blocks as separate block |
|
blocks.insert(0, block[m.end():].lstrip('\n')) |
|
if block[:m.start()].strip(): |
|
# Add any content before match back to blocks as separate block |
|
blocks.insert(0, block[:m.start()].rstrip('\n')) |
|
return True |
|
# No match. Restore block. |
|
blocks.insert(0, block) |
|
return False |
|
|
|
def _generate_pattern(self, text): |
|
''' |
|
Given a string, returns an regex pattern to match that string. |
|
|
|
'HTML' -> r'(?P<abbr>[H][T][M][L])' |
|
|
|
Note: we force each char as a literal match (in brackets) as we don't |
|
know what they will be beforehand. |
|
|
|
''' |
|
chars = list(text) |
|
for i in range(len(chars)): |
|
chars[i] = r'[%s]' % chars[i] |
|
return r'(?P<abbr>\b%s\b)' % (r''.join(chars)) |
|
|
|
|
|
class AbbrInlineProcessor(InlineProcessor): |
|
""" Abbreviation inline pattern. """ |
|
|
|
def __init__(self, pattern, title): |
|
super().__init__(pattern) |
|
self.title = title |
|
|
|
def handleMatch(self, m, data): |
|
abbr = etree.Element('abbr') |
|
abbr.text = AtomicString(m.group('abbr')) |
|
abbr.set('title', self.title) |
|
return abbr, m.start(0), m.end(0) |
|
|
|
|
|
def makeExtension(**kwargs): # pragma: no cover |
|
return AbbrExtension(**kwargs)
|
|
|