This commit is contained in:
Waylon Walker 2022-03-31 20:20:07 -05:00
commit 38355d2442
No known key found for this signature in database
GPG key ID: 66E2BF2B4190EFE4
9083 changed files with 1225834 additions and 0 deletions

View file

@ -0,0 +1,228 @@
from __future__ import unicode_literals
import re
from builtins import str
from commonmark.common import escape_xml
from commonmark.render.renderer import Renderer
reUnsafeProtocol = re.compile(
r'^javascript:|vbscript:|file:|data:', re.IGNORECASE)
reSafeDataProtocol = re.compile(
r'^data:image\/(?:png|gif|jpeg|webp)', re.IGNORECASE)
def potentially_unsafe(url):
return re.search(reUnsafeProtocol, url) and \
(not re.search(reSafeDataProtocol, url))
class HtmlRenderer(Renderer):
def __init__(self, options={}):
# by default, soft breaks are rendered as newlines in HTML
options['softbreak'] = options.get('softbreak') or '\n'
# set to "<br />" to make them hard breaks
# set to " " if you want to ignore line wrapping in source
self.disable_tags = 0
self.last_out = '\n'
self.options = options
def escape(self, text):
return escape_xml(text)
def tag(self, name, attrs=None, selfclosing=None):
"""Helper function to produce an HTML tag."""
if self.disable_tags > 0:
return
self.buf += '<' + name
if attrs and len(attrs) > 0:
for attrib in attrs:
self.buf += ' ' + attrib[0] + '="' + attrib[1] + '"'
if selfclosing:
self.buf += ' /'
self.buf += '>'
self.last_out = '>'
# Node methods #
def text(self, node, entering=None):
self.out(node.literal)
def softbreak(self, node=None, entering=None):
self.lit(self.options['softbreak'])
def linebreak(self, node=None, entering=None):
self.tag('br', [], True)
self.cr()
def link(self, node, entering):
attrs = self.attrs(node)
if entering:
if not (self.options.get('safe') and
potentially_unsafe(node.destination)):
attrs.append(['href', self.escape(node.destination)])
if node.title:
attrs.append(['title', self.escape(node.title)])
self.tag('a', attrs)
else:
self.tag('/a')
def image(self, node, entering):
if entering:
if self.disable_tags == 0:
if self.options.get('safe') and \
potentially_unsafe(node.destination):
self.lit('<img src="" alt="')
else:
self.lit('<img src="' +
self.escape(node.destination) +
'" alt="')
self.disable_tags += 1
else:
self.disable_tags -= 1
if self.disable_tags == 0:
if node.title:
self.lit('" title="' + self.escape(node.title))
self.lit('" />')
def emph(self, node, entering):
self.tag('em' if entering else '/em')
def strong(self, node, entering):
self.tag('strong' if entering else '/strong')
def paragraph(self, node, entering):
grandparent = node.parent.parent
attrs = self.attrs(node)
if grandparent is not None and grandparent.t == 'list':
if grandparent.list_data['tight']:
return
if entering:
self.cr()
self.tag('p', attrs)
else:
self.tag('/p')
self.cr()
def heading(self, node, entering):
tagname = 'h' + str(node.level)
attrs = self.attrs(node)
if entering:
self.cr()
self.tag(tagname, attrs)
else:
self.tag('/' + tagname)
self.cr()
def code(self, node, entering):
self.tag('code')
self.out(node.literal)
self.tag('/code')
def code_block(self, node, entering):
info_words = node.info.split() if node.info else []
attrs = self.attrs(node)
if len(info_words) > 0 and len(info_words[0]) > 0:
attrs.append(['class', 'language-' +
self.escape(info_words[0])])
self.cr()
self.tag('pre')
self.tag('code', attrs)
self.out(node.literal)
self.tag('/code')
self.tag('/pre')
self.cr()
def thematic_break(self, node, entering):
attrs = self.attrs(node)
self.cr()
self.tag('hr', attrs, True)
self.cr()
def block_quote(self, node, entering):
attrs = self.attrs(node)
if entering:
self.cr()
self.tag('blockquote', attrs)
self.cr()
else:
self.cr()
self.tag('/blockquote')
self.cr()
def list(self, node, entering):
tagname = 'ul' if node.list_data['type'] == 'bullet' else 'ol'
attrs = self.attrs(node)
if entering:
start = node.list_data['start']
if start is not None and start != 1:
attrs.append(['start', str(start)])
self.cr()
self.tag(tagname, attrs)
self.cr()
else:
self.cr()
self.tag('/' + tagname)
self.cr()
def item(self, node, entering):
attrs = self.attrs(node)
if entering:
self.tag('li', attrs)
else:
self.tag('/li')
self.cr()
def html_inline(self, node, entering):
if self.options.get('safe'):
self.lit('<!-- raw HTML omitted -->')
else:
self.lit(node.literal)
def html_block(self, node, entering):
self.cr()
if self.options.get('safe'):
self.lit('<!-- raw HTML omitted -->')
else:
self.lit(node.literal)
self.cr()
def custom_inline(self, node, entering):
if entering and node.on_enter:
self.lit(node.on_enter)
elif (not entering) and node.on_exit:
self.lit(node.on_exit)
def custom_block(self, node, entering):
self.cr()
if entering and node.on_enter:
self.lit(node.on_enter)
elif (not entering) and node.on_exit:
self.lit(node.on_exit)
self.cr()
# Helper methods #
def out(self, s):
self.lit(self.escape(s))
def attrs(self, node):
att = []
if self.options.get('sourcepos'):
pos = node.sourcepos
if pos:
att.append(['data-sourcepos', str(pos[0][0]) + ':' +
str(pos[0][1]) + '-' + str(pos[1][0]) + ':' +
str(pos[1][1])])
return att

View file

@ -0,0 +1,43 @@
from __future__ import unicode_literals
class Renderer(object):
def render(self, ast):
"""Walks the AST and calls member methods for each Node type.
@param ast {Node} The root of the abstract syntax tree.
"""
walker = ast.walker()
self.buf = ''
self.last_out = '\n'
event = walker.nxt()
while event is not None:
type_ = event['node'].t
if hasattr(self, type_):
getattr(self, type_)(event['node'], event['entering'])
event = walker.nxt()
return self.buf
def lit(self, s):
"""Concatenate a literal string to the buffer.
@param str {String} The string to concatenate.
"""
self.buf += s
self.last_out = s
def cr(self):
if self.last_out != '\n':
self.lit('\n')
def out(self, s):
"""Concatenate a string to the buffer possibly escaping the content.
Concrete renderer implementations should override this method.
@param str {String} The string to concatenate.
"""
self.lit(s)

View file

@ -0,0 +1,159 @@
from __future__ import unicode_literals
from commonmark.render.renderer import Renderer
class ReStructuredTextRenderer(Renderer):
"""
Render reStructuredText from Markdown
Example:
.. code:: python
import commonmark
parser = commonmark.Parser()
ast = parser.parse('Hello `inline code` example')
renderer = commonmark.ReStructuredTextRenderer()
rst = renderer.render(ast)
print(rst) # Hello ``inline code`` example
"""
def __init__(self, indent_char=' '):
self.indent_char = indent_char
self.indent_length = 0
def lit(self, s):
if s == '\n':
indent = '' # Avoid whitespace if we're just adding a newline
elif self.last_out != '\n':
indent = '' # Don't indent if we're in the middle of a line
else:
indent = self.indent_char * self.indent_length
return super(ReStructuredTextRenderer, self).lit(indent + s)
def cr(self):
self.lit('\n')
def indent_lines(self, literal, indent_length=4):
indent = self.indent_char * indent_length
new_lines = []
for line in literal.splitlines():
new_lines.append(indent + line)
return '\n'.join(new_lines)
# Nodes
def document(self, node, entering):
pass
def softbreak(self, node, entering):
self.cr()
def linebreak(self, node, entering):
self.cr()
self.cr()
def text(self, node, entering):
self.out(node.literal)
def emph(self, node, entering):
self.out('*')
def strong(self, node, entering):
self.out('**')
def paragraph(self, node, entering):
if node.parent.t == 'item':
pass
else:
self.cr()
def link(self, node, entering):
if entering:
self.out('`')
else:
self.out(' <%s>`_' % node.destination)
def image(self, node, entering):
directive = '.. image:: ' + node.destination
if entering:
self.out(directive)
self.cr()
self.indent_length += 4
self.out(':alt: ')
else:
self.indent_length -= 4
def code(self, node, entering):
self.out('``')
self.out(node.literal)
self.out('``')
def code_block(self, node, entering):
directive = '.. code::'
language_name = None
info_words = node.info.split() if node.info else []
if len(info_words) > 0 and len(info_words[0]) > 0:
language_name = info_words[0]
if language_name:
directive += ' ' + language_name
self.cr()
self.out(directive)
self.cr()
self.cr()
self.out(self.indent_lines(node.literal))
self.cr()
def list(self, node, entering):
if entering:
self.cr()
def item(self, node, entering):
tagname = '*' if node.list_data['type'] == 'bullet' else '#.'
if entering:
self.out(tagname + ' ')
else:
self.cr()
def block_quote(self, node, entering):
if entering:
self.indent_length += 4
else:
self.indent_length -= 4
def heading(self, node, entering):
heading_chars = [
'#',
'*',
'=',
'-',
'^',
'"'
]
try:
heading_char = heading_chars[node.level-1]
except IndexError:
# Default to the last level if we're in too deep
heading_char = heading_chars[-1]
heading_length = len(node.first_child.literal)
banner = heading_char * heading_length
if entering:
self.cr()
else:
self.cr()
self.out(banner)
self.cr()