init
This commit is contained in:
commit
38355d2442
9083 changed files with 1225834 additions and 0 deletions
8
.venv/lib/python3.8/site-packages/commonmark/__init__.py
Normal file
8
.venv/lib/python3.8/site-packages/commonmark/__init__.py
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
# flake8: noqa
|
||||
from __future__ import unicode_literals, absolute_import
|
||||
|
||||
from commonmark.main import commonmark
|
||||
from commonmark.dump import dumpAST, dumpJSON
|
||||
from commonmark.blocks import Parser
|
||||
from commonmark.render.html import HtmlRenderer
|
||||
from commonmark.render.rst import ReStructuredTextRenderer
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
908
.venv/lib/python3.8/site-packages/commonmark/blocks.py
Normal file
908
.venv/lib/python3.8/site-packages/commonmark/blocks.py
Normal file
|
|
@ -0,0 +1,908 @@
|
|||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import re
|
||||
from commonmark import common
|
||||
from commonmark.common import unescape_string
|
||||
from commonmark.inlines import InlineParser
|
||||
from commonmark.node import Node
|
||||
|
||||
|
||||
CODE_INDENT = 4
|
||||
reHtmlBlockOpen = [
|
||||
re.compile(r'.'), # dummy for 0
|
||||
re.compile(r'^<(?:script|pre|style)(?:\s|>|$)', re.IGNORECASE),
|
||||
re.compile(r'^<!--'),
|
||||
re.compile(r'^<[?]'),
|
||||
re.compile(r'^<![A-Z]'),
|
||||
re.compile(r'^<!\[CDATA\['),
|
||||
re.compile(
|
||||
r'^<[/]?(?:address|article|aside|base|basefont|blockquote|body|'
|
||||
r'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|'
|
||||
r'fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|'
|
||||
r'header|hr|html|iframe|legend|li|link|main|menu|menuitem|'
|
||||
r'nav|noframes|ol|optgroup|option|p|param|section|source|title|'
|
||||
r'summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)'
|
||||
r'(?:\s|[/]?[>]|$)',
|
||||
re.IGNORECASE),
|
||||
re.compile(
|
||||
'^(?:' + common.OPENTAG + '|' + common.CLOSETAG + ')\\s*$',
|
||||
re.IGNORECASE),
|
||||
]
|
||||
reHtmlBlockClose = [
|
||||
re.compile(r'.'), # dummy for 0
|
||||
re.compile(r'<\/(?:script|pre|style)>', re.IGNORECASE),
|
||||
re.compile(r'-->'),
|
||||
re.compile(r'\?>'),
|
||||
re.compile(r'>'),
|
||||
re.compile(r'\]\]>'),
|
||||
]
|
||||
reThematicBreak = re.compile(
|
||||
r'^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$')
|
||||
reMaybeSpecial = re.compile(r'^[#`~*+_=<>0-9-]')
|
||||
reNonSpace = re.compile(r'[^ \t\f\v\r\n]')
|
||||
reBulletListMarker = re.compile(r'^[*+-]')
|
||||
reOrderedListMarker = re.compile(r'^(\d{1,9})([.)])')
|
||||
reATXHeadingMarker = re.compile(r'^#{1,6}(?:[ \t]+|$)')
|
||||
reCodeFence = re.compile(r'^`{3,}(?!.*`)|^~{3,}')
|
||||
reClosingCodeFence = re.compile(r'^(?:`{3,}|~{3,})(?= *$)')
|
||||
reSetextHeadingLine = re.compile(r'^(?:=+|-+)[ \t]*$')
|
||||
reLineEnding = re.compile(r'\r\n|\n|\r')
|
||||
|
||||
|
||||
def is_blank(s):
|
||||
"""Returns True if string contains only space characters."""
|
||||
return re.search(reNonSpace, s) is None
|
||||
|
||||
|
||||
def is_space_or_tab(s):
|
||||
return s in (' ', '\t')
|
||||
|
||||
|
||||
def peek(ln, pos):
|
||||
if pos < len(ln):
|
||||
return ln[pos]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def ends_with_blank_line(block):
|
||||
""" Returns true if block ends with a blank line,
|
||||
descending if needed into lists and sublists."""
|
||||
while block:
|
||||
if block.last_line_blank:
|
||||
return True
|
||||
if not block.last_line_checked and \
|
||||
block.t in ('list', 'item'):
|
||||
block.last_line_checked = True
|
||||
block = block.last_child
|
||||
else:
|
||||
block.last_line_checked = True
|
||||
break
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def parse_list_marker(parser, container):
|
||||
""" Parse a list marker and return data on the marker (type,
|
||||
start, delimiter, bullet character, padding) or None."""
|
||||
rest = parser.current_line[parser.next_nonspace:]
|
||||
data = {
|
||||
'type': None,
|
||||
'tight': True, # lists are tight by default
|
||||
'bullet_char': None,
|
||||
'start': None,
|
||||
'delimiter': None,
|
||||
'padding': None,
|
||||
'marker_offset': parser.indent,
|
||||
}
|
||||
if parser.indent >= 4:
|
||||
return None
|
||||
m = re.search(reBulletListMarker, rest)
|
||||
m2 = re.search(reOrderedListMarker, rest)
|
||||
if m:
|
||||
data['type'] = 'bullet'
|
||||
data['bullet_char'] = m.group()[0]
|
||||
elif m2 and (container.t != 'paragraph' or m2.group(1) == '1'):
|
||||
m = m2
|
||||
data['type'] = 'ordered'
|
||||
data['start'] = int(m.group(1))
|
||||
data['delimiter'] = m.group(2)
|
||||
else:
|
||||
return None
|
||||
|
||||
# make sure we have spaces after
|
||||
nextc = peek(parser.current_line, parser.next_nonspace + len(m.group()))
|
||||
if not (nextc is None or nextc == '\t' or nextc == ' '):
|
||||
return None
|
||||
|
||||
# if it interrupts paragraph, make sure first line isn't blank
|
||||
if container.t == 'paragraph' and \
|
||||
not re.search(
|
||||
reNonSpace,
|
||||
parser.current_line[parser.next_nonspace + len(m.group()):]):
|
||||
return None
|
||||
|
||||
# we've got a match! advance offset and calculate padding
|
||||
parser.advance_next_nonspace() # to start of marker
|
||||
parser.advance_offset(len(m.group()), True) # to end of marker
|
||||
spaces_start_col = parser.column
|
||||
spaces_start_offset = parser.offset
|
||||
while True:
|
||||
parser.advance_offset(1, True)
|
||||
nextc = peek(parser.current_line, parser.offset)
|
||||
if parser.column - spaces_start_col < 5 and \
|
||||
is_space_or_tab(nextc):
|
||||
pass
|
||||
else:
|
||||
break
|
||||
blank_item = peek(parser.current_line, parser.offset) is None
|
||||
spaces_after_marker = parser.column - spaces_start_col
|
||||
if spaces_after_marker >= 5 or \
|
||||
spaces_after_marker < 1 or \
|
||||
blank_item:
|
||||
data['padding'] = len(m.group()) + 1
|
||||
parser.column = spaces_start_col
|
||||
parser.offset = spaces_start_offset
|
||||
if is_space_or_tab(peek(parser.current_line, parser.offset)):
|
||||
parser.advance_offset(1, True)
|
||||
else:
|
||||
data['padding'] = len(m.group()) + spaces_after_marker
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def lists_match(list_data, item_data):
|
||||
"""
|
||||
Returns True if the two list items are of the same type,
|
||||
with the same delimiter and bullet character. This is used
|
||||
in agglomerating list items into lists.
|
||||
"""
|
||||
return list_data.get('type') == item_data.get('type') and \
|
||||
list_data.get('delimiter') == item_data.get('delimiter') and \
|
||||
list_data.get('bullet_char') == item_data.get('bullet_char')
|
||||
|
||||
|
||||
class Block(object):
|
||||
accepts_lines = None
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return
|
||||
|
||||
|
||||
class Document(Block):
|
||||
accepts_lines = False
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return t != 'item'
|
||||
|
||||
|
||||
class List(Block):
|
||||
accepts_lines = False
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
item = block.first_child
|
||||
while item:
|
||||
# check for non-final list item ending with blank line:
|
||||
if ends_with_blank_line(item) and item.nxt:
|
||||
block.list_data['tight'] = False
|
||||
break
|
||||
# recurse into children of list item, to see if there are
|
||||
# spaces between any of them:
|
||||
subitem = item.first_child
|
||||
while subitem:
|
||||
if ends_with_blank_line(subitem) and \
|
||||
(item.nxt or subitem.nxt):
|
||||
block.list_data['tight'] = False
|
||||
break
|
||||
subitem = subitem.nxt
|
||||
item = item.nxt
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return t == 'item'
|
||||
|
||||
|
||||
class BlockQuote(Block):
|
||||
accepts_lines = False
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
ln = parser.current_line
|
||||
if not parser.indented and peek(ln, parser.next_nonspace) == '>':
|
||||
parser.advance_next_nonspace()
|
||||
parser.advance_offset(1, False)
|
||||
if is_space_or_tab(peek(ln, parser.offset)):
|
||||
parser.advance_offset(1, True)
|
||||
else:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return t != 'item'
|
||||
|
||||
|
||||
class Item(Block):
|
||||
accepts_lines = False
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
if parser.blank:
|
||||
if container.first_child is None:
|
||||
# Blank line after empty list item
|
||||
return 1
|
||||
else:
|
||||
parser.advance_next_nonspace()
|
||||
elif parser.indent >= (container.list_data['marker_offset'] +
|
||||
container.list_data['padding']):
|
||||
parser.advance_offset(
|
||||
container.list_data['marker_offset'] +
|
||||
container.list_data['padding'], True)
|
||||
else:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return t != 'item'
|
||||
|
||||
|
||||
class Heading(Block):
|
||||
accepts_lines = False
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
# A heading can never container > 1 line, so fail to match:
|
||||
return 1
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return False
|
||||
|
||||
|
||||
class ThematicBreak(Block):
|
||||
accepts_lines = False
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
# A thematic break can never container > 1 line, so fail to match:
|
||||
return 1
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return False
|
||||
|
||||
|
||||
class CodeBlock(Block):
|
||||
accepts_lines = True
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
ln = parser.current_line
|
||||
indent = parser.indent
|
||||
if container.is_fenced:
|
||||
match = indent <= 3 and \
|
||||
len(ln) >= parser.next_nonspace + 1 and \
|
||||
ln[parser.next_nonspace] == container.fence_char and \
|
||||
re.search(reClosingCodeFence, ln[parser.next_nonspace:])
|
||||
if match and len(match.group()) >= container.fence_length:
|
||||
# closing fence - we're at end of line, so we can return
|
||||
parser.finalize(container, parser.line_number)
|
||||
return 2
|
||||
else:
|
||||
# skip optional spaces of fence offset
|
||||
i = container.fence_offset
|
||||
while i > 0 and is_space_or_tab(peek(ln, parser.offset)):
|
||||
parser.advance_offset(1, True)
|
||||
i -= 1
|
||||
else:
|
||||
# indented
|
||||
if indent >= CODE_INDENT:
|
||||
parser.advance_offset(CODE_INDENT, True)
|
||||
elif parser.blank:
|
||||
parser.advance_next_nonspace()
|
||||
else:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
if block.is_fenced:
|
||||
# first line becomes info string
|
||||
content = block.string_content
|
||||
newline_pos = content.index('\n')
|
||||
first_line = content[0:newline_pos]
|
||||
rest = content[newline_pos + 1:]
|
||||
block.info = unescape_string(first_line.strip())
|
||||
block.literal = rest
|
||||
else:
|
||||
# indented
|
||||
block.literal = re.sub(r'(\n *)+$', '\n', block.string_content)
|
||||
|
||||
block.string_content = None
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return False
|
||||
|
||||
|
||||
class HtmlBlock(Block):
|
||||
accepts_lines = True
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
if parser.blank and (container.html_block_type == 6 or
|
||||
container.html_block_type == 7):
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
block.literal = re.sub(r'(\n *)+$', '', block.string_content)
|
||||
# allow GC
|
||||
block.string_content = None
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return False
|
||||
|
||||
|
||||
class Paragraph(Block):
|
||||
accepts_lines = True
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
return 1 if parser.blank else 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
has_reference_defs = False
|
||||
|
||||
# try parsing the beginning as link reference definitions:
|
||||
while peek(block.string_content, 0) == '[':
|
||||
pos = parser.inline_parser.parseReference(
|
||||
block.string_content, parser.refmap)
|
||||
if not pos:
|
||||
break
|
||||
block.string_content = block.string_content[pos:]
|
||||
has_reference_defs = True
|
||||
if has_reference_defs and is_blank(block.string_content):
|
||||
block.unlink()
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return False
|
||||
|
||||
|
||||
class BlockStarts(object):
|
||||
"""Block start functions.
|
||||
|
||||
Return values:
|
||||
0 = no match
|
||||
1 = matched container, keep going
|
||||
2 = matched leaf, no more block starts
|
||||
"""
|
||||
METHODS = [
|
||||
'block_quote',
|
||||
'atx_heading',
|
||||
'fenced_code_block',
|
||||
'html_block',
|
||||
'setext_heading',
|
||||
'thematic_break',
|
||||
'list_item',
|
||||
'indented_code_block',
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def block_quote(parser, container=None):
|
||||
if not parser.indented and \
|
||||
peek(parser.current_line, parser.next_nonspace) == '>':
|
||||
parser.advance_next_nonspace()
|
||||
parser.advance_offset(1, False)
|
||||
# optional following space
|
||||
if is_space_or_tab(peek(parser.current_line, parser.offset)):
|
||||
parser.advance_offset(1, True)
|
||||
parser.close_unmatched_blocks()
|
||||
parser.add_child('block_quote', parser.next_nonspace)
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def atx_heading(parser, container=None):
|
||||
if not parser.indented:
|
||||
m = re.search(reATXHeadingMarker,
|
||||
parser.current_line[parser.next_nonspace:])
|
||||
if m:
|
||||
parser.advance_next_nonspace()
|
||||
parser.advance_offset(len(m.group()), False)
|
||||
parser.close_unmatched_blocks()
|
||||
container = parser.add_child('heading', parser.next_nonspace)
|
||||
# number of #s
|
||||
container.level = len(m.group().strip())
|
||||
# remove trailing ###s:
|
||||
container.string_content = re.sub(
|
||||
r'[ \t]+#+[ \t]*$', '', re.sub(
|
||||
r'^[ \t]*#+[ \t]*$',
|
||||
'',
|
||||
parser.current_line[parser.offset:]))
|
||||
parser.advance_offset(
|
||||
len(parser.current_line) - parser.offset, False)
|
||||
return 2
|
||||
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def fenced_code_block(parser, container=None):
|
||||
if not parser.indented:
|
||||
m = re.search(
|
||||
reCodeFence,
|
||||
parser.current_line[parser.next_nonspace:])
|
||||
if m:
|
||||
fence_length = len(m.group())
|
||||
parser.close_unmatched_blocks()
|
||||
container = parser.add_child(
|
||||
'code_block', parser.next_nonspace)
|
||||
container.is_fenced = True
|
||||
container.fence_length = fence_length
|
||||
container.fence_char = m.group()[0]
|
||||
container.fence_offset = parser.indent
|
||||
parser.advance_next_nonspace()
|
||||
parser.advance_offset(fence_length, False)
|
||||
return 2
|
||||
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def html_block(parser, container=None):
|
||||
if not parser.indented and \
|
||||
peek(parser.current_line, parser.next_nonspace) == '<':
|
||||
s = parser.current_line[parser.next_nonspace:]
|
||||
|
||||
for block_type in range(1, 8):
|
||||
if re.search(reHtmlBlockOpen[block_type], s) and \
|
||||
(block_type < 7 or container.t != 'paragraph'):
|
||||
parser.close_unmatched_blocks()
|
||||
# We don't adjust parser.offset;
|
||||
# spaces are part of the HTML block:
|
||||
b = parser.add_child('html_block', parser.offset)
|
||||
b.html_block_type = block_type
|
||||
return 2
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def setext_heading(parser, container=None):
|
||||
if not parser.indented and container.t == 'paragraph':
|
||||
m = re.search(
|
||||
reSetextHeadingLine,
|
||||
parser.current_line[parser.next_nonspace:])
|
||||
if m:
|
||||
parser.close_unmatched_blocks()
|
||||
# resolve reference link definitiosn
|
||||
while peek(container.string_content, 0) == '[':
|
||||
pos = parser.inline_parser.parseReference(
|
||||
container.string_content, parser.refmap)
|
||||
if not pos:
|
||||
break
|
||||
container.string_content = container.string_content[pos:]
|
||||
if container.string_content:
|
||||
heading = Node('heading', container.sourcepos)
|
||||
heading.level = 1 if m.group()[0] == '=' else 2
|
||||
heading.string_content = container.string_content
|
||||
container.insert_after(heading)
|
||||
container.unlink()
|
||||
parser.tip = heading
|
||||
parser.advance_offset(
|
||||
len(parser.current_line) - parser.offset, False)
|
||||
return 2
|
||||
else:
|
||||
return 0
|
||||
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def thematic_break(parser, container=None):
|
||||
if not parser.indented and re.search(
|
||||
reThematicBreak, parser.current_line[parser.next_nonspace:]):
|
||||
parser.close_unmatched_blocks()
|
||||
parser.add_child('thematic_break', parser.next_nonspace)
|
||||
parser.advance_offset(
|
||||
len(parser.current_line) - parser.offset, False)
|
||||
return 2
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def list_item(parser, container=None):
|
||||
if (not parser.indented or container.t == 'list'):
|
||||
data = parse_list_marker(parser, container)
|
||||
if data:
|
||||
parser.close_unmatched_blocks()
|
||||
|
||||
# add the list if needed
|
||||
if parser.tip.t != 'list' or \
|
||||
not lists_match(container.list_data, data):
|
||||
container = parser.add_child('list', parser.next_nonspace)
|
||||
container.list_data = data
|
||||
|
||||
# add the list item
|
||||
container = parser.add_child('item', parser.next_nonspace)
|
||||
container.list_data = data
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def indented_code_block(parser, container=None):
|
||||
if parser.indented and \
|
||||
parser.tip.t != 'paragraph' and \
|
||||
not parser.blank:
|
||||
# indented code
|
||||
parser.advance_offset(CODE_INDENT, True)
|
||||
parser.close_unmatched_blocks()
|
||||
parser.add_child('code_block', parser.offset)
|
||||
return 2
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
class Parser(object):
|
||||
def __init__(self, options={}):
|
||||
self.doc = Node('document', [[1, 1], [0, 0]])
|
||||
self.block_starts = BlockStarts()
|
||||
self.tip = self.doc
|
||||
self.oldtip = self.doc
|
||||
self.current_line = ''
|
||||
self.line_number = 0
|
||||
self.offset = 0
|
||||
self.column = 0
|
||||
self.next_nonspace = 0
|
||||
self.next_nonspace_column = 0
|
||||
self.indent = 0
|
||||
self.indented = False
|
||||
self.blank = False
|
||||
self.partially_consumed_tab = False
|
||||
self.all_closed = True
|
||||
self.last_matched_container = self.doc
|
||||
self.refmap = {}
|
||||
self.last_line_length = 0
|
||||
self.inline_parser = InlineParser(options)
|
||||
self.options = options
|
||||
|
||||
def add_line(self):
|
||||
""" Add a line to the block at the tip. We assume the tip
|
||||
can accept lines -- that check should be done before calling this."""
|
||||
if self.partially_consumed_tab:
|
||||
# Skip over tab
|
||||
self.offset += 1
|
||||
# Add space characters
|
||||
chars_to_tab = 4 - (self.column % 4)
|
||||
self.tip.string_content += (' ' * chars_to_tab)
|
||||
self.tip.string_content += (self.current_line[self.offset:] + '\n')
|
||||
|
||||
def add_child(self, tag, offset):
|
||||
""" Add block of type tag as a child of the tip. If the tip can't
|
||||
accept children, close and finalize it and try its parent,
|
||||
and so on til we find a block that can accept children."""
|
||||
while not self.blocks[self.tip.t].can_contain(tag):
|
||||
self.finalize(self.tip, self.line_number - 1)
|
||||
|
||||
column_number = offset + 1
|
||||
new_block = Node(tag, [[self.line_number, column_number], [0, 0]])
|
||||
new_block.string_content = ''
|
||||
self.tip.append_child(new_block)
|
||||
self.tip = new_block
|
||||
return new_block
|
||||
|
||||
def close_unmatched_blocks(self):
|
||||
"""Finalize and close any unmatched blocks."""
|
||||
if not self.all_closed:
|
||||
while self.oldtip != self.last_matched_container:
|
||||
parent = self.oldtip.parent
|
||||
self.finalize(self.oldtip, self.line_number - 1)
|
||||
self.oldtip = parent
|
||||
self.all_closed = True
|
||||
|
||||
def find_next_nonspace(self):
|
||||
current_line = self.current_line
|
||||
i = self.offset
|
||||
cols = self.column
|
||||
|
||||
try:
|
||||
c = current_line[i]
|
||||
except IndexError:
|
||||
c = ''
|
||||
while c != '':
|
||||
if c == ' ':
|
||||
i += 1
|
||||
cols += 1
|
||||
elif c == '\t':
|
||||
i += 1
|
||||
cols += (4 - (cols % 4))
|
||||
else:
|
||||
break
|
||||
|
||||
try:
|
||||
c = current_line[i]
|
||||
except IndexError:
|
||||
c = ''
|
||||
|
||||
self.blank = (c == '\n' or c == '\r' or c == '')
|
||||
self.next_nonspace = i
|
||||
self.next_nonspace_column = cols
|
||||
self.indent = self.next_nonspace_column - self.column
|
||||
self.indented = self.indent >= CODE_INDENT
|
||||
|
||||
def advance_next_nonspace(self):
|
||||
self.offset = self.next_nonspace
|
||||
self.column = self.next_nonspace_column
|
||||
self.partially_consumed_tab = False
|
||||
|
||||
def advance_offset(self, count, columns):
|
||||
current_line = self.current_line
|
||||
try:
|
||||
c = current_line[self.offset]
|
||||
except IndexError:
|
||||
c = None
|
||||
while count > 0 and c is not None:
|
||||
if c == '\t':
|
||||
chars_to_tab = 4 - (self.column % 4)
|
||||
if columns:
|
||||
self.partially_consumed_tab = chars_to_tab > count
|
||||
chars_to_advance = min(count, chars_to_tab)
|
||||
self.column += chars_to_advance
|
||||
self.offset += 0 if self.partially_consumed_tab else 1
|
||||
count -= chars_to_advance
|
||||
else:
|
||||
self.partially_consumed_tab = False
|
||||
self.column += chars_to_tab
|
||||
self.offset += 1
|
||||
count -= 1
|
||||
else:
|
||||
self.partially_consumed_tab = False
|
||||
self.offset += 1
|
||||
# assume ascii; block starts are ascii
|
||||
self.column += 1
|
||||
count -= 1
|
||||
try:
|
||||
c = current_line[self.offset]
|
||||
except IndexError:
|
||||
c = None
|
||||
|
||||
def incorporate_line(self, ln):
|
||||
"""Analyze a line of text and update the document appropriately.
|
||||
|
||||
We parse markdown text by calling this on each line of input,
|
||||
then finalizing the document.
|
||||
"""
|
||||
all_matched = True
|
||||
|
||||
container = self.doc
|
||||
self.oldtip = self.tip
|
||||
self.offset = 0
|
||||
self.column = 0
|
||||
self.blank = False
|
||||
self.partially_consumed_tab = False
|
||||
self.line_number += 1
|
||||
|
||||
# replace NUL characters for security
|
||||
if re.search(r'\u0000', ln) is not None:
|
||||
ln = re.sub(r'\0', '\uFFFD', ln)
|
||||
|
||||
self.current_line = ln
|
||||
|
||||
# For each containing block, try to parse the associated line start.
|
||||
# Bail out on failure: container will point to the last matching block.
|
||||
# Set all_matched to false if not all containers match.
|
||||
while True:
|
||||
last_child = container.last_child
|
||||
if not (last_child and last_child.is_open):
|
||||
break
|
||||
container = last_child
|
||||
|
||||
self.find_next_nonspace()
|
||||
|
||||
rv = self.blocks[container.t].continue_(self, container)
|
||||
if rv == 0:
|
||||
# we've matched, keep going
|
||||
pass
|
||||
elif rv == 1:
|
||||
# we've failed to match a block
|
||||
all_matched = False
|
||||
elif rv == 2:
|
||||
# we've hit end of line for fenced code close and can return
|
||||
self.last_line_length = len(ln)
|
||||
return
|
||||
else:
|
||||
raise ValueError(
|
||||
'continue_ returned illegal value, must be 0, 1, or 2')
|
||||
|
||||
if not all_matched:
|
||||
# back up to last matching block
|
||||
container = container.parent
|
||||
break
|
||||
|
||||
self.all_closed = (container == self.oldtip)
|
||||
self.last_matched_container = container
|
||||
|
||||
matched_leaf = container.t != 'paragraph' and \
|
||||
self.blocks[container.t].accepts_lines
|
||||
starts = self.block_starts
|
||||
starts_len = len(starts.METHODS)
|
||||
# Unless last matched container is a code block, try new container
|
||||
# starts, adding children to the last matched container:
|
||||
while not matched_leaf:
|
||||
self.find_next_nonspace()
|
||||
|
||||
# this is a little performance optimization:
|
||||
if not self.indented and \
|
||||
not re.search(reMaybeSpecial, ln[self.next_nonspace:]):
|
||||
self.advance_next_nonspace()
|
||||
break
|
||||
|
||||
i = 0
|
||||
while i < starts_len:
|
||||
res = getattr(starts, starts.METHODS[i])(self, container)
|
||||
if res == 1:
|
||||
container = self.tip
|
||||
break
|
||||
elif res == 2:
|
||||
container = self.tip
|
||||
matched_leaf = True
|
||||
break
|
||||
else:
|
||||
i += 1
|
||||
|
||||
if i == starts_len:
|
||||
# nothing matched
|
||||
self.advance_next_nonspace()
|
||||
break
|
||||
|
||||
# What remains at the offset is a text line. Add the text to the
|
||||
# appropriate container.
|
||||
if not self.all_closed and not self.blank and \
|
||||
self.tip.t == 'paragraph':
|
||||
# lazy paragraph continuation
|
||||
self.add_line()
|
||||
else:
|
||||
# not a lazy continuation
|
||||
# finalize any blocks not matched
|
||||
self.close_unmatched_blocks()
|
||||
if self.blank and container.last_child:
|
||||
container.last_child.last_line_blank = True
|
||||
|
||||
t = container.t
|
||||
|
||||
# Block quote lines are never blank as they start with >
|
||||
# and we don't count blanks in fenced code for purposes of
|
||||
# tight/loose lists or breaking out of lists. We also
|
||||
# don't set last_line_blank on an empty list item, or if we
|
||||
# just closed a fenced block.
|
||||
last_line_blank = self.blank and \
|
||||
not (t == 'block_quote' or
|
||||
(t == 'code_block' and container.is_fenced) or
|
||||
(t == 'item' and
|
||||
not container.first_child and
|
||||
container.sourcepos[0][0] == self.line_number))
|
||||
|
||||
# propagate last_line_blank up through parents:
|
||||
cont = container
|
||||
while cont:
|
||||
cont.last_line_blank = last_line_blank
|
||||
cont = cont.parent
|
||||
|
||||
if self.blocks[t].accepts_lines:
|
||||
self.add_line()
|
||||
# if HtmlBlock, check for end condition
|
||||
if t == 'html_block' and \
|
||||
container.html_block_type >= 1 and \
|
||||
container.html_block_type <= 5 and \
|
||||
re.search(
|
||||
reHtmlBlockClose[container.html_block_type],
|
||||
self.current_line[self.offset:]):
|
||||
self.finalize(container, self.line_number)
|
||||
elif self.offset < len(ln) and not self.blank:
|
||||
# create a paragraph container for one line
|
||||
container = self.add_child('paragraph', self.offset)
|
||||
self.advance_next_nonspace()
|
||||
self.add_line()
|
||||
|
||||
self.last_line_length = len(ln)
|
||||
|
||||
def finalize(self, block, line_number):
|
||||
""" Finalize a block. Close it and do any necessary postprocessing,
|
||||
e.g. creating string_content from strings, setting the 'tight'
|
||||
or 'loose' status of a list, and parsing the beginnings
|
||||
of paragraphs for reference definitions. Reset the tip to the
|
||||
parent of the closed block."""
|
||||
above = block.parent
|
||||
block.is_open = False
|
||||
block.sourcepos[1] = [line_number, self.last_line_length]
|
||||
|
||||
self.blocks[block.t].finalize(self, block)
|
||||
|
||||
self.tip = above
|
||||
|
||||
def process_inlines(self, block):
|
||||
"""
|
||||
Walk through a block & children recursively, parsing string content
|
||||
into inline content where appropriate.
|
||||
"""
|
||||
walker = block.walker()
|
||||
self.inline_parser.refmap = self.refmap
|
||||
self.inline_parser.options = self.options
|
||||
event = walker.nxt()
|
||||
while event is not None:
|
||||
node = event['node']
|
||||
t = node.t
|
||||
if not event['entering'] and (t == 'paragraph' or t == 'heading'):
|
||||
self.inline_parser.parse(node)
|
||||
event = walker.nxt()
|
||||
|
||||
def parse(self, my_input):
|
||||
""" The main parsing function. Returns a parsed document AST."""
|
||||
self.doc = Node('document', [[1, 1], [0, 0]])
|
||||
self.tip = self.doc
|
||||
self.refmap = {}
|
||||
self.line_number = 0
|
||||
self.last_line_length = 0
|
||||
self.offset = 0
|
||||
self.column = 0
|
||||
self.last_matched_container = self.doc
|
||||
self.current_line = ''
|
||||
lines = re.split(reLineEnding, my_input)
|
||||
length = len(lines)
|
||||
if len(my_input) > 0 and my_input[-1] == '\n':
|
||||
# ignore last blank line created by final newline
|
||||
length -= 1
|
||||
for i in range(length):
|
||||
self.incorporate_line(lines[i])
|
||||
while (self.tip):
|
||||
self.finalize(self.tip, length)
|
||||
self.process_inlines(self.doc)
|
||||
return self.doc
|
||||
|
||||
|
||||
CAMEL_RE = re.compile("(.)([A-Z](?:[a-z]+|(?<=[a-z0-9].)))")
|
||||
Parser.blocks = dict(
|
||||
(CAMEL_RE.sub(r'\1_\2', cls.__name__).lower(), cls)
|
||||
for cls in Block.__subclasses__())
|
||||
53
.venv/lib/python3.8/site-packages/commonmark/cmark.py
Normal file
53
.venv/lib/python3.8/site-packages/commonmark/cmark.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
import argparse
|
||||
import sys
|
||||
import commonmark
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Process Markdown according to "
|
||||
"the CommonMark specification.")
|
||||
if sys.version_info < (3, 0):
|
||||
reload(sys) # noqa
|
||||
sys.setdefaultencoding('utf-8')
|
||||
parser.add_argument(
|
||||
'infile',
|
||||
nargs="?",
|
||||
type=argparse.FileType('r'),
|
||||
default=sys.stdin,
|
||||
help="Input Markdown file to parse, defaults to STDIN")
|
||||
parser.add_argument(
|
||||
'-o',
|
||||
nargs="?",
|
||||
type=argparse.FileType('w'),
|
||||
default=sys.stdout,
|
||||
help="Output HTML/JSON file, defaults to STDOUT")
|
||||
parser.add_argument('-a', action="store_true", help="Print formatted AST")
|
||||
parser.add_argument('-aj', action="store_true", help="Output JSON AST")
|
||||
args = parser.parse_args()
|
||||
parser = commonmark.Parser()
|
||||
f = args.infile
|
||||
o = args.o
|
||||
lines = []
|
||||
for line in f:
|
||||
lines.append(line)
|
||||
data = "".join(lines)
|
||||
ast = parser.parse(data)
|
||||
if not args.a and not args.aj:
|
||||
renderer = commonmark.HtmlRenderer()
|
||||
o.write(renderer.render(ast))
|
||||
exit()
|
||||
if args.a:
|
||||
# print ast
|
||||
commonmark.dumpAST(ast)
|
||||
exit()
|
||||
|
||||
# o.write(ast.to_JSON())
|
||||
o.write(commonmark.dumpJSON(ast))
|
||||
exit()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
113
.venv/lib/python3.8/site-packages/commonmark/common.py
Normal file
113
.venv/lib/python3.8/site-packages/commonmark/common.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
try:
|
||||
from urllib.parse import quote
|
||||
except ImportError:
|
||||
from urllib import quote
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
if sys.version_info >= (3, 4):
|
||||
import html
|
||||
HTMLunescape = html.unescape
|
||||
else:
|
||||
from .entitytrans import _unescape
|
||||
HTMLunescape = _unescape
|
||||
else:
|
||||
from commonmark import entitytrans
|
||||
HTMLunescape = entitytrans._unescape
|
||||
|
||||
ENTITY = '&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});'
|
||||
|
||||
TAGNAME = '[A-Za-z][A-Za-z0-9-]*'
|
||||
ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*'
|
||||
UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+"
|
||||
SINGLEQUOTEDVALUE = "'[^']*'"
|
||||
DOUBLEQUOTEDVALUE = '"[^"]*"'
|
||||
ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + \
|
||||
"|" + DOUBLEQUOTEDVALUE + ")"
|
||||
ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"
|
||||
ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"
|
||||
OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"
|
||||
CLOSETAG = "</" + TAGNAME + "\\s*[>]"
|
||||
HTMLCOMMENT = '<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->'
|
||||
PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"
|
||||
DECLARATION = "<![A-Z]+" + "\\s+[^>]*>"
|
||||
CDATA = '<!\\[CDATA\\[[\\s\\S]*?\\]\\]>'
|
||||
HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" + \
|
||||
PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"
|
||||
reHtmlTag = re.compile('^' + HTMLTAG, re.IGNORECASE)
|
||||
reBackslashOrAmp = re.compile(r'[\\&]')
|
||||
ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'
|
||||
reEntityOrEscapedChar = re.compile(
|
||||
'\\\\' + ESCAPABLE + '|' + ENTITY, re.IGNORECASE)
|
||||
XMLSPECIAL = '[&<>"]'
|
||||
reXmlSpecial = re.compile(XMLSPECIAL)
|
||||
|
||||
|
||||
def unescape_char(s):
|
||||
if s[0] == '\\':
|
||||
return s[1]
|
||||
else:
|
||||
return HTMLunescape(s)
|
||||
|
||||
|
||||
def unescape_string(s):
|
||||
"""Replace entities and backslash escapes with literal characters."""
|
||||
if re.search(reBackslashOrAmp, s):
|
||||
return re.sub(
|
||||
reEntityOrEscapedChar,
|
||||
lambda m: unescape_char(m.group()),
|
||||
s)
|
||||
else:
|
||||
return s
|
||||
|
||||
|
||||
def normalize_uri(uri):
|
||||
try:
|
||||
return quote(uri.encode('utf-8'), safe=str('/@:+?=&()%#*,'))
|
||||
except UnicodeDecodeError:
|
||||
# Python 2 also throws a UnicodeDecodeError, complaining about
|
||||
# the width of the "safe" string. Removing this parameter
|
||||
# solves the issue, but yields overly aggressive quoting, but we
|
||||
# can correct those errors manually.
|
||||
s = quote(uri.encode('utf-8'))
|
||||
s = re.sub(r'%40', '@', s)
|
||||
s = re.sub(r'%3A', ':', s)
|
||||
s = re.sub(r'%2B', '+', s)
|
||||
s = re.sub(r'%3F', '?', s)
|
||||
s = re.sub(r'%3D', '=', s)
|
||||
s = re.sub(r'%26', '&', s)
|
||||
s = re.sub(r'%28', '(', s)
|
||||
s = re.sub(r'%29', ')', s)
|
||||
s = re.sub(r'%25', '%', s)
|
||||
s = re.sub(r'%23', '#', s)
|
||||
s = re.sub(r'%2A', '*', s)
|
||||
s = re.sub(r'%2C', ',', s)
|
||||
return s
|
||||
|
||||
|
||||
UNSAFE_MAP = {
|
||||
'&': '&',
|
||||
'<': '<',
|
||||
'>': '>',
|
||||
'"': '"',
|
||||
}
|
||||
|
||||
|
||||
def replace_unsafe_char(s):
|
||||
return UNSAFE_MAP.get(s, s)
|
||||
|
||||
|
||||
def escape_xml(s):
|
||||
if s is None:
|
||||
return ''
|
||||
if re.search(reXmlSpecial, s):
|
||||
return re.sub(
|
||||
reXmlSpecial,
|
||||
lambda m: replace_unsafe_char(m.group()),
|
||||
s)
|
||||
else:
|
||||
return s
|
||||
108
.venv/lib/python3.8/site-packages/commonmark/dump.py
Normal file
108
.venv/lib/python3.8/site-packages/commonmark/dump.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
from builtins import str
|
||||
import json
|
||||
from commonmark.node import is_container
|
||||
|
||||
|
||||
def prepare(obj, topnode=False):
|
||||
"""Walk the complete AST, only returning needed data.
|
||||
|
||||
This removes circular references and allows us to output
|
||||
JSON.
|
||||
"""
|
||||
a = []
|
||||
for subnode, entered in obj.walker():
|
||||
rep = {
|
||||
'type': subnode.t,
|
||||
}
|
||||
if subnode.literal:
|
||||
rep['literal'] = subnode.literal
|
||||
|
||||
if subnode.string_content:
|
||||
rep['string_content'] = subnode.string_content
|
||||
|
||||
if subnode.title:
|
||||
rep['title'] = subnode.title
|
||||
|
||||
if subnode.info:
|
||||
rep['info'] = subnode.info
|
||||
|
||||
if subnode.destination:
|
||||
rep['destination'] = subnode.destination
|
||||
|
||||
if subnode.list_data:
|
||||
rep['list_data'] = subnode.list_data
|
||||
|
||||
if is_container(subnode):
|
||||
rep['children'] = []
|
||||
|
||||
if entered and len(a) > 0:
|
||||
if a[-1]['children']:
|
||||
a[-1]['children'].append(rep)
|
||||
else:
|
||||
a[-1]['children'] = [rep]
|
||||
else:
|
||||
a.append(rep)
|
||||
return a
|
||||
|
||||
|
||||
def dumpJSON(obj):
|
||||
"""Output AST in JSON form, this is destructive of block."""
|
||||
prepared = prepare(obj)
|
||||
return json.dumps(prepared, indent=4, sort_keys=True)
|
||||
|
||||
|
||||
def dumpAST(obj, ind=0, topnode=False):
|
||||
"""Print out a block/entire AST."""
|
||||
indChar = ("\t" * ind) + "-> " if ind else ""
|
||||
print(indChar + "[" + obj.t + "]")
|
||||
if not obj.title == "":
|
||||
print("\t" + indChar + "Title: " + (obj.title or ''))
|
||||
if not obj.info == "":
|
||||
print("\t" + indChar + "Info: " + (obj.info or ''))
|
||||
if not obj.destination == "":
|
||||
print("\t" + indChar + "Destination: " + (obj.destination or ''))
|
||||
if obj.is_open:
|
||||
print("\t" + indChar + "Open: " + str(obj.is_open))
|
||||
if obj.last_line_blank:
|
||||
print(
|
||||
"\t" + indChar + "Last line blank: " + str(obj.last_line_blank))
|
||||
if obj.sourcepos:
|
||||
print("\t" + indChar + "Sourcepos: " + str(obj.sourcepos))
|
||||
if not obj.string_content == "":
|
||||
print("\t" + indChar + "String content: " + (obj.string_content or ''))
|
||||
if not obj.info == "":
|
||||
print("\t" + indChar + "Info: " + (obj.info or ''))
|
||||
if not obj.literal == "":
|
||||
print("\t" + indChar + "Literal: " + (obj.literal or ''))
|
||||
if obj.list_data.get('type'):
|
||||
print("\t" + indChar + "List Data: ")
|
||||
print("\t\t" + indChar + "[type] = " + obj.list_data.get('type'))
|
||||
if obj.list_data.get('bullet_char'):
|
||||
print(
|
||||
"\t\t" + indChar + "[bullet_char] = " +
|
||||
obj.list_data['bullet_char'])
|
||||
if obj.list_data.get('start'):
|
||||
print(
|
||||
"\t\t" + indChar + "[start] = " +
|
||||
str(obj.list_data.get('start')))
|
||||
if obj.list_data.get('delimiter'):
|
||||
print(
|
||||
"\t\t" + indChar + "[delimiter] = " +
|
||||
obj.list_data.get('delimiter'))
|
||||
if obj.list_data.get('padding'):
|
||||
print(
|
||||
"\t\t" + indChar + "[padding] = " +
|
||||
str(obj.list_data.get('padding')))
|
||||
if obj.list_data.get('marker_offset'):
|
||||
print(
|
||||
"\t\t" + indChar + "[marker_offset] = " +
|
||||
str(obj.list_data.get('marker_offset')))
|
||||
if obj.walker:
|
||||
print("\t" + indChar + "Children:")
|
||||
walker = obj.walker()
|
||||
nxt = walker.nxt()
|
||||
while nxt is not None and topnode is False:
|
||||
dumpAST(nxt['node'], ind + 2, topnode=True)
|
||||
nxt = walker.nxt()
|
||||
2353
.venv/lib/python3.8/site-packages/commonmark/entitytrans.py
Normal file
2353
.venv/lib/python3.8/site-packages/commonmark/entitytrans.py
Normal file
File diff suppressed because it is too large
Load diff
882
.venv/lib/python3.8/site-packages/commonmark/inlines.py
Normal file
882
.venv/lib/python3.8/site-packages/commonmark/inlines.py
Normal file
|
|
@ -0,0 +1,882 @@
|
|||
from __future__ import absolute_import, unicode_literals, division
|
||||
|
||||
import re
|
||||
import sys
|
||||
from commonmark import common
|
||||
from commonmark.common import normalize_uri, unescape_string
|
||||
from commonmark.node import Node
|
||||
from commonmark.normalize_reference import normalize_reference
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
if sys.version_info >= (3, 4):
|
||||
import html
|
||||
HTMLunescape = html.unescape
|
||||
else:
|
||||
from .entitytrans import _unescape
|
||||
HTMLunescape = _unescape
|
||||
else:
|
||||
from commonmark import entitytrans
|
||||
HTMLunescape = entitytrans._unescape
|
||||
|
||||
# Some regexps used in inline parser:
|
||||
|
||||
ESCAPED_CHAR = '\\\\' + common.ESCAPABLE
|
||||
|
||||
rePunctuation = re.compile(
|
||||
r'[!"#$%&\'()*+,\-./:;<=>?@\[\]\\^_`{|}~\xA1\xA7\xAB\xB6\xB7\xBB'
|
||||
r'\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3'
|
||||
r'\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F'
|
||||
r'\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E'
|
||||
r'\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12'
|
||||
r'\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB'
|
||||
r'\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736'
|
||||
r'\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-'
|
||||
r'\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F'
|
||||
r'\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E'
|
||||
r'\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5'
|
||||
r'\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC'
|
||||
r'\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42\u3001-\u3003\u3008-\u3011'
|
||||
r'\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673'
|
||||
r'\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E'
|
||||
r'\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0'
|
||||
r'\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63'
|
||||
r'\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B'
|
||||
r'\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-'
|
||||
r'\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58'
|
||||
r'\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D'
|
||||
r'\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD'
|
||||
r'\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDCC6\uDDC1-\uDDD7'
|
||||
r'\uDE41-\uDE43\uDF3C-\uDF3E]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F'
|
||||
r'\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]'
|
||||
)
|
||||
|
||||
reLinkTitle = re.compile(
|
||||
'^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' +
|
||||
'|' +
|
||||
'\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' +
|
||||
'|' +
|
||||
'\\((' + ESCAPED_CHAR + '|[^()\\x00])*\\))')
|
||||
reLinkDestinationBraces = re.compile(r'^(?:<(?:[^<>\n\\\x00]|\\.)*>)')
|
||||
|
||||
reEscapable = re.compile('^' + common.ESCAPABLE)
|
||||
reEntityHere = re.compile('^' + common.ENTITY, re.IGNORECASE)
|
||||
reTicks = re.compile(r'`+')
|
||||
reTicksHere = re.compile(r'^`+')
|
||||
reEllipses = re.compile(r'\.\.\.')
|
||||
reDash = re.compile(r'--+')
|
||||
reEmailAutolink = re.compile(
|
||||
r"^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9]"
|
||||
r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?"
|
||||
r"(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>")
|
||||
reAutolink = re.compile(
|
||||
r'^<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>',
|
||||
re.IGNORECASE)
|
||||
reSpnl = re.compile(r'^ *(?:\n *)?')
|
||||
reWhitespaceChar = re.compile(r'^^[ \t\n\x0b\x0c\x0d]')
|
||||
reWhitespace = re.compile(r'[ \t\n\x0b\x0c\x0d]+')
|
||||
reUnicodeWhitespaceChar = re.compile(r'^\s')
|
||||
reFinalSpace = re.compile(r' *$')
|
||||
reInitialSpace = re.compile(r'^ *')
|
||||
reSpaceAtEndOfLine = re.compile(r'^ *(?:\n|$)')
|
||||
reLinkLabel = re.compile(r'^\[(?:[^\\\[\]]|\\.){0,1000}\]')
|
||||
# Matches a string of non-special characters.
|
||||
reMain = re.compile(r'^[^\n`\[\]\\!<&*_\'"]+', re.MULTILINE)
|
||||
|
||||
|
||||
def text(s):
|
||||
node = Node('text', None)
|
||||
node.literal = s
|
||||
return node
|
||||
|
||||
|
||||
def smart_dashes(chars):
|
||||
en_count = 0
|
||||
em_count = 0
|
||||
if len(chars) % 3 == 0:
|
||||
# If divisible by 3, use all em dashes
|
||||
em_count = len(chars) // 3
|
||||
elif len(chars) % 2 == 0:
|
||||
# If divisble by 2, use all en dashes
|
||||
en_count = len(chars) // 2
|
||||
elif len(chars) % 3 == 2:
|
||||
# if 2 extra dashes, use en dashfor last 2;
|
||||
# em dashes for rest
|
||||
en_count = 1
|
||||
em_count = (len(chars) - 2) // 3
|
||||
else:
|
||||
# Use en dashes for last 4 hyphens; em dashes for rest
|
||||
en_count = 2
|
||||
em_count = (len(chars) - 4) // 3
|
||||
return ('\u2014' * em_count) + ('\u2013' * en_count)
|
||||
|
||||
|
||||
class InlineParser(object):
|
||||
"""INLINE PARSER
|
||||
|
||||
These are methods of an InlineParser class, defined below.
|
||||
An InlineParser keeps track of a subject (a string to be
|
||||
parsed) and a position in that subject.
|
||||
"""
|
||||
|
||||
def __init__(self, options={}):
|
||||
self.subject = ''
|
||||
self.brackets = None
|
||||
self.pos = 0
|
||||
self.refmap = {}
|
||||
self.options = options
|
||||
|
||||
def match(self, regexString):
|
||||
"""
|
||||
If regexString matches at current position in the subject, advance
|
||||
position in subject and return the match; otherwise return None.
|
||||
"""
|
||||
match = re.search(regexString, self.subject[self.pos:])
|
||||
if match is None:
|
||||
return None
|
||||
else:
|
||||
self.pos += match.end()
|
||||
return match.group()
|
||||
|
||||
def peek(self):
|
||||
""" Returns the character at the current subject position, or None if
|
||||
there are no more characters."""
|
||||
if self.pos < len(self.subject):
|
||||
return self.subject[self.pos]
|
||||
else:
|
||||
return None
|
||||
|
||||
def spnl(self):
|
||||
""" Parse zero or more space characters, including at
|
||||
most one newline."""
|
||||
self.match(reSpnl)
|
||||
return True
|
||||
|
||||
# All of the parsers below try to match something at the current position
|
||||
# in the subject. If they succeed in matching anything, they
|
||||
# push an inline matched, advancing the subject.
|
||||
|
||||
def parseBackticks(self, block):
|
||||
""" Attempt to parse backticks, adding either a backtick code span or a
|
||||
literal sequence of backticks to the 'inlines' list."""
|
||||
ticks = self.match(reTicksHere)
|
||||
if ticks is None:
|
||||
return False
|
||||
after_open_ticks = self.pos
|
||||
matched = self.match(reTicks)
|
||||
while matched is not None:
|
||||
if matched == ticks:
|
||||
node = Node('code', None)
|
||||
contents = self.subject[after_open_ticks:self.pos-len(ticks)] \
|
||||
.replace('\n', ' ')
|
||||
if contents.lstrip(' ') and contents[0] == contents[-1] == ' ':
|
||||
node.literal = contents[1:-1]
|
||||
else:
|
||||
node.literal = contents
|
||||
block.append_child(node)
|
||||
return True
|
||||
matched = self.match(reTicks)
|
||||
# If we got here, we didn't match a closing backtick sequence.
|
||||
self.pos = after_open_ticks
|
||||
block.append_child(text(ticks))
|
||||
return True
|
||||
|
||||
def parseBackslash(self, block):
|
||||
"""
|
||||
Parse a backslash-escaped special character, adding either the
|
||||
escaped character, a hard line break (if the backslash is followed
|
||||
by a newline), or a literal backslash to the block's children.
|
||||
Assumes current character is a backslash.
|
||||
"""
|
||||
subj = self.subject
|
||||
self.pos += 1
|
||||
|
||||
try:
|
||||
subjchar = subj[self.pos]
|
||||
except IndexError:
|
||||
subjchar = None
|
||||
|
||||
if self.peek() == '\n':
|
||||
self.pos += 1
|
||||
node = Node('linebreak', None)
|
||||
block.append_child(node)
|
||||
elif subjchar and re.search(reEscapable, subjchar):
|
||||
block.append_child(text(subjchar))
|
||||
self.pos += 1
|
||||
else:
|
||||
block.append_child(text('\\'))
|
||||
|
||||
return True
|
||||
|
||||
def parseAutolink(self, block):
|
||||
"""Attempt to parse an autolink (URL or email in pointy brackets)."""
|
||||
m = self.match(reEmailAutolink)
|
||||
|
||||
if m:
|
||||
# email
|
||||
dest = m[1:-1]
|
||||
node = Node('link', None)
|
||||
node.destination = normalize_uri('mailto:' + dest)
|
||||
node.title = ''
|
||||
node.append_child(text(dest))
|
||||
block.append_child(node)
|
||||
return True
|
||||
else:
|
||||
m = self.match(reAutolink)
|
||||
if m:
|
||||
# link
|
||||
dest = m[1:-1]
|
||||
node = Node('link', None)
|
||||
node.destination = normalize_uri(dest)
|
||||
node.title = ''
|
||||
node.append_child(text(dest))
|
||||
block.append_child(node)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def parseHtmlTag(self, block):
|
||||
"""Attempt to parse a raw HTML tag."""
|
||||
m = self.match(common.reHtmlTag)
|
||||
if m is None:
|
||||
return False
|
||||
else:
|
||||
node = Node('html_inline', None)
|
||||
node.literal = m
|
||||
block.append_child(node)
|
||||
return True
|
||||
|
||||
def scanDelims(self, c):
|
||||
"""
|
||||
Scan a sequence of characters == c, and return information about
|
||||
the number of delimiters and whether they are positioned such that
|
||||
they can open and/or close emphasis or strong emphasis. A utility
|
||||
function for strong/emph parsing.
|
||||
"""
|
||||
numdelims = 0
|
||||
startpos = self.pos
|
||||
|
||||
if c == "'" or c == '"':
|
||||
numdelims += 1
|
||||
self.pos += 1
|
||||
else:
|
||||
while (self.peek() == c):
|
||||
numdelims += 1
|
||||
self.pos += 1
|
||||
|
||||
if numdelims == 0:
|
||||
return None
|
||||
|
||||
c_before = '\n' if startpos == 0 else self.subject[startpos - 1]
|
||||
|
||||
c_after = self.peek()
|
||||
if c_after is None:
|
||||
c_after = '\n'
|
||||
|
||||
# Python 2 doesn't recognize '\xa0' as whitespace
|
||||
after_is_whitespace = re.search(reUnicodeWhitespaceChar, c_after) or \
|
||||
c_after == '\xa0'
|
||||
after_is_punctuation = re.search(rePunctuation, c_after)
|
||||
before_is_whitespace = re.search(
|
||||
reUnicodeWhitespaceChar, c_before) or \
|
||||
c_before == '\xa0'
|
||||
before_is_punctuation = re.search(rePunctuation, c_before)
|
||||
|
||||
left_flanking = not after_is_whitespace and \
|
||||
(not after_is_punctuation or
|
||||
before_is_whitespace or
|
||||
before_is_punctuation)
|
||||
right_flanking = not before_is_whitespace and \
|
||||
(not before_is_punctuation or
|
||||
after_is_whitespace or
|
||||
after_is_punctuation)
|
||||
if c == '_':
|
||||
can_open = left_flanking and \
|
||||
(not right_flanking or before_is_punctuation)
|
||||
can_close = right_flanking and \
|
||||
(not left_flanking or after_is_punctuation)
|
||||
elif c == "'" or c == '"':
|
||||
can_open = left_flanking and not right_flanking
|
||||
can_close = right_flanking
|
||||
else:
|
||||
can_open = left_flanking
|
||||
can_close = right_flanking
|
||||
|
||||
self.pos = startpos
|
||||
return {
|
||||
'numdelims': numdelims,
|
||||
'can_open': can_open,
|
||||
'can_close': can_close,
|
||||
}
|
||||
|
||||
def handleDelim(self, cc, block):
|
||||
"""Handle a delimiter marker for emphasis or a quote."""
|
||||
res = self.scanDelims(cc)
|
||||
if not res:
|
||||
return False
|
||||
numdelims = res.get('numdelims')
|
||||
startpos = self.pos
|
||||
|
||||
self.pos += numdelims
|
||||
if cc == "'":
|
||||
contents = '\u2019'
|
||||
elif cc == '"':
|
||||
contents = '\u201C'
|
||||
else:
|
||||
contents = self.subject[startpos:self.pos]
|
||||
node = text(contents)
|
||||
block.append_child(node)
|
||||
|
||||
# Add entry to stack for this opener
|
||||
self.delimiters = {
|
||||
'cc': cc,
|
||||
'numdelims': numdelims,
|
||||
'origdelims': numdelims,
|
||||
'node': node,
|
||||
'previous': self.delimiters,
|
||||
'next': None,
|
||||
'can_open': res.get('can_open'),
|
||||
'can_close': res.get('can_close'),
|
||||
}
|
||||
if self.delimiters['previous'] is not None:
|
||||
self.delimiters['previous']['next'] = self.delimiters
|
||||
return True
|
||||
|
||||
def removeDelimiter(self, delim):
|
||||
if delim.get('previous') is not None:
|
||||
delim['previous']['next'] = delim.get('next')
|
||||
if delim.get('next') is None:
|
||||
# Top of stack
|
||||
self.delimiters = delim.get('previous')
|
||||
else:
|
||||
delim['next']['previous'] = delim.get('previous')
|
||||
|
||||
@staticmethod
|
||||
def removeDelimitersBetween(bottom, top):
|
||||
if bottom.get('next') != top:
|
||||
bottom['next'] = top
|
||||
top['previous'] = bottom
|
||||
|
||||
def processEmphasis(self, stack_bottom):
|
||||
openers_bottom = {
|
||||
'_': stack_bottom,
|
||||
'*': stack_bottom,
|
||||
"'": stack_bottom,
|
||||
'"': stack_bottom,
|
||||
}
|
||||
odd_match = False
|
||||
use_delims = 0
|
||||
|
||||
# Find first closer above stack_bottom
|
||||
closer = self.delimiters
|
||||
while closer is not None and closer.get('previous') != stack_bottom:
|
||||
closer = closer.get('previous')
|
||||
|
||||
# Move forward, looking for closers, and handling each
|
||||
while closer is not None:
|
||||
if not closer.get('can_close'):
|
||||
closer = closer.get('next')
|
||||
else:
|
||||
# found emphasis closer. now look back for first
|
||||
# matching opener:
|
||||
opener = closer.get('previous')
|
||||
opener_found = False
|
||||
closercc = closer.get('cc')
|
||||
while (opener is not None and opener != stack_bottom and
|
||||
opener != openers_bottom[closercc]):
|
||||
odd_match = (closer.get('can_open') or
|
||||
opener.get('can_close')) and \
|
||||
closer['origdelims'] % 3 != 0 and \
|
||||
(opener['origdelims'] +
|
||||
closer['origdelims']) % 3 == 0
|
||||
if opener.get('cc') == closercc and \
|
||||
opener.get('can_open') and \
|
||||
not odd_match:
|
||||
opener_found = True
|
||||
break
|
||||
opener = opener.get('previous')
|
||||
old_closer = closer
|
||||
|
||||
if closercc == '*' or closercc == '_':
|
||||
if not opener_found:
|
||||
closer = closer.get('next')
|
||||
else:
|
||||
# Calculate actual number of delimiters used from
|
||||
# closer
|
||||
use_delims = 2 if (
|
||||
closer['numdelims'] >= 2 and
|
||||
opener['numdelims'] >= 2) else 1
|
||||
|
||||
opener_inl = opener.get('node')
|
||||
closer_inl = closer.get('node')
|
||||
|
||||
# Remove used delimiters from stack elts and inlines
|
||||
opener['numdelims'] -= use_delims
|
||||
closer['numdelims'] -= use_delims
|
||||
opener_inl.literal = opener_inl.literal[
|
||||
:len(opener_inl.literal) - use_delims]
|
||||
closer_inl.literal = closer_inl.literal[
|
||||
:len(closer_inl.literal) - use_delims]
|
||||
|
||||
# Build contents for new Emph element
|
||||
if use_delims == 1:
|
||||
emph = Node('emph', None)
|
||||
else:
|
||||
emph = Node('strong', None)
|
||||
|
||||
tmp = opener_inl.nxt
|
||||
while tmp and tmp != closer_inl:
|
||||
nxt = tmp.nxt
|
||||
tmp.unlink()
|
||||
emph.append_child(tmp)
|
||||
tmp = nxt
|
||||
|
||||
opener_inl.insert_after(emph)
|
||||
|
||||
# Remove elts between opener and closer in delimiters
|
||||
# stack
|
||||
self.removeDelimitersBetween(opener, closer)
|
||||
|
||||
# If opener has 0 delims, remove it and the inline
|
||||
if opener['numdelims'] == 0:
|
||||
opener_inl.unlink()
|
||||
self.removeDelimiter(opener)
|
||||
|
||||
if closer['numdelims'] == 0:
|
||||
closer_inl.unlink()
|
||||
tempstack = closer['next']
|
||||
self.removeDelimiter(closer)
|
||||
closer = tempstack
|
||||
|
||||
elif closercc == "'":
|
||||
closer['node'].literal = '\u2019'
|
||||
if opener_found:
|
||||
opener['node'].literal = '\u2018'
|
||||
closer = closer['next']
|
||||
|
||||
elif closercc == '"':
|
||||
closer['node'].literal = '\u201D'
|
||||
if opener_found:
|
||||
opener['node'].literal = '\u201C'
|
||||
closer = closer['next']
|
||||
|
||||
if not opener_found and not odd_match:
|
||||
# Set lower bound for future searches for openers:
|
||||
# We don't do this with odd_match because a **
|
||||
# that doesn't match an earlier * might turn into
|
||||
# an opener, and the * might be matched by something
|
||||
# else.
|
||||
openers_bottom[closercc] = old_closer['previous']
|
||||
if not old_closer['can_open']:
|
||||
# We can remove a closer that can't be an opener,
|
||||
# once we've seen there's no matching opener:
|
||||
self.removeDelimiter(old_closer)
|
||||
|
||||
# Remove all delimiters
|
||||
while self.delimiters is not None and self.delimiters != stack_bottom:
|
||||
self.removeDelimiter(self.delimiters)
|
||||
|
||||
def parseLinkTitle(self):
|
||||
"""
|
||||
Attempt to parse link title (sans quotes), returning the string
|
||||
or None if no match.
|
||||
"""
|
||||
title = self.match(reLinkTitle)
|
||||
if title is None:
|
||||
return None
|
||||
else:
|
||||
# chop off quotes from title and unescape:
|
||||
return unescape_string(title[1:-1])
|
||||
|
||||
def parseLinkDestination(self):
|
||||
"""
|
||||
Attempt to parse link destination, returning the string or
|
||||
None if no match.
|
||||
"""
|
||||
res = self.match(reLinkDestinationBraces)
|
||||
if res is None:
|
||||
if self.peek() == '<':
|
||||
return None
|
||||
# TODO handrolled parser; res should be None or the string
|
||||
savepos = self.pos
|
||||
openparens = 0
|
||||
while True:
|
||||
c = self.peek()
|
||||
if c is None:
|
||||
break
|
||||
if c == '\\' and re.search(
|
||||
reEscapable, self.subject[self.pos+1:self.pos+2]):
|
||||
self.pos += 1
|
||||
if self.peek() is not None:
|
||||
self.pos += 1
|
||||
elif c == '(':
|
||||
self.pos += 1
|
||||
openparens += 1
|
||||
elif c == ')':
|
||||
if openparens < 1:
|
||||
break
|
||||
else:
|
||||
self.pos += 1
|
||||
openparens -= 1
|
||||
elif re.search(reWhitespaceChar, c):
|
||||
break
|
||||
else:
|
||||
self.pos += 1
|
||||
if self.pos == savepos and c != ')':
|
||||
return None
|
||||
res = self.subject[savepos:self.pos]
|
||||
return normalize_uri(unescape_string(res))
|
||||
else:
|
||||
# chop off surrounding <..>:
|
||||
return normalize_uri(unescape_string(res[1:-1]))
|
||||
|
||||
def parseLinkLabel(self):
|
||||
"""
|
||||
Attempt to parse a link label, returning number of
|
||||
characters parsed.
|
||||
"""
|
||||
# Note: our regex will allow something of form [..\];
|
||||
# we disallow it here rather than using lookahead in the regex:
|
||||
m = self.match(reLinkLabel)
|
||||
if m is None or len(m) > 1001:
|
||||
return 0
|
||||
else:
|
||||
return len(m)
|
||||
|
||||
def parseOpenBracket(self, block):
|
||||
"""
|
||||
Add open bracket to delimiter stack and add a text node to
|
||||
block's children.
|
||||
"""
|
||||
startpos = self.pos
|
||||
self.pos += 1
|
||||
|
||||
node = text('[')
|
||||
block.append_child(node)
|
||||
|
||||
# Add entry to stack for this opener
|
||||
self.addBracket(node, startpos, False)
|
||||
return True
|
||||
|
||||
def parseBang(self, block):
|
||||
"""
|
||||
If next character is [, and ! delimiter to delimiter stack and
|
||||
add a text node to block's children. Otherwise just add a text
|
||||
node.
|
||||
"""
|
||||
startpos = self.pos
|
||||
self.pos += 1
|
||||
if self.peek() == '[':
|
||||
self.pos += 1
|
||||
|
||||
node = text('![')
|
||||
block.append_child(node)
|
||||
|
||||
# Add entry to stack for this openeer
|
||||
self.addBracket(node, startpos + 1, True)
|
||||
else:
|
||||
block.append_child(text('!'))
|
||||
|
||||
return True
|
||||
|
||||
def parseCloseBracket(self, block):
|
||||
"""
|
||||
Try to match close bracket against an opening in the delimiter
|
||||
stack. Add either a link or image, or a plain [ character,
|
||||
to block's children. If there is a matching delimiter,
|
||||
remove it from the delimiter stack.
|
||||
"""
|
||||
title = None
|
||||
matched = False
|
||||
self.pos += 1
|
||||
startpos = self.pos
|
||||
|
||||
# get last [ or ![
|
||||
opener = self.brackets
|
||||
|
||||
if opener is None:
|
||||
# no matched opener, just return a literal
|
||||
block.append_child(text(']'))
|
||||
return True
|
||||
|
||||
if not opener.get('active'):
|
||||
# no matched opener, just return a literal
|
||||
block.append_child(text(']'))
|
||||
# take opener off brackets stack
|
||||
self.removeBracket()
|
||||
return True
|
||||
|
||||
# If we got here, opener is a potential opener
|
||||
is_image = opener.get('image')
|
||||
|
||||
# Check to see if we have a link/image
|
||||
|
||||
savepos = self.pos
|
||||
|
||||
# Inline link?
|
||||
if self.peek() == '(':
|
||||
self.pos += 1
|
||||
self.spnl()
|
||||
dest = self.parseLinkDestination()
|
||||
if dest is not None and self.spnl():
|
||||
# make sure there's a space before the title
|
||||
if re.search(reWhitespaceChar, self.subject[self.pos-1]):
|
||||
title = self.parseLinkTitle()
|
||||
if self.spnl() and self.peek() == ')':
|
||||
self.pos += 1
|
||||
matched = True
|
||||
else:
|
||||
self.pos = savepos
|
||||
|
||||
if not matched:
|
||||
# Next, see if there's a link label
|
||||
beforelabel = self.pos
|
||||
n = self.parseLinkLabel()
|
||||
if n > 2:
|
||||
reflabel = self.subject[beforelabel:beforelabel + n]
|
||||
elif not opener.get('bracket_after'):
|
||||
# Empty or missing second label means to use the first
|
||||
# label as the reference. The reference must not
|
||||
# contain a bracket. If we know there's a bracket, we
|
||||
# don't even bother checking it.
|
||||
reflabel = self.subject[opener.get('index'):startpos]
|
||||
if n == 0:
|
||||
# If shortcut reference link, rewind before spaces we skipped.
|
||||
self.pos = savepos
|
||||
|
||||
if reflabel:
|
||||
# lookup rawlabel in refmap
|
||||
link = self.refmap.get(normalize_reference(reflabel))
|
||||
if link:
|
||||
dest = link['destination']
|
||||
title = link['title']
|
||||
matched = True
|
||||
|
||||
if matched:
|
||||
node = Node('image' if is_image else 'link', None)
|
||||
|
||||
node.destination = dest
|
||||
node.title = title or ''
|
||||
tmp = opener.get('node').nxt
|
||||
while tmp:
|
||||
nxt = tmp.nxt
|
||||
tmp.unlink()
|
||||
node.append_child(tmp)
|
||||
tmp = nxt
|
||||
block.append_child(node)
|
||||
self.processEmphasis(opener.get('previousDelimiter'))
|
||||
self.removeBracket()
|
||||
opener.get('node').unlink()
|
||||
|
||||
# We remove this bracket and processEmphasis will remove
|
||||
# later delimiters.
|
||||
# Now, for a link, we also deactivate earlier link openers.
|
||||
# (no links in links)
|
||||
if not is_image:
|
||||
opener = self.brackets
|
||||
while opener is not None:
|
||||
if not opener.get('image'):
|
||||
# deactivate this opener
|
||||
opener['active'] = False
|
||||
opener = opener.get('previous')
|
||||
|
||||
return True
|
||||
else:
|
||||
# no match
|
||||
# remove this opener from stack
|
||||
self.removeBracket()
|
||||
self.pos = startpos
|
||||
block.append_child(text(']'))
|
||||
return True
|
||||
|
||||
def addBracket(self, node, index, image):
|
||||
if self.brackets is not None:
|
||||
self.brackets['bracketAfter'] = True
|
||||
|
||||
self.brackets = {
|
||||
'node': node,
|
||||
'previous': self.brackets,
|
||||
'previousDelimiter': self.delimiters,
|
||||
'index': index,
|
||||
'image': image,
|
||||
'active': True,
|
||||
}
|
||||
|
||||
def removeBracket(self):
|
||||
self.brackets = self.brackets.get('previous')
|
||||
|
||||
def parseEntity(self, block):
|
||||
"""Attempt to parse an entity."""
|
||||
m = self.match(reEntityHere)
|
||||
if m:
|
||||
block.append_child(text(HTMLunescape(m)))
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def parseString(self, block):
|
||||
"""
|
||||
Parse a run of ordinary characters, or a single character with
|
||||
a special meaning in markdown, as a plain string.
|
||||
"""
|
||||
m = self.match(reMain)
|
||||
if m:
|
||||
if self.options.get('smart'):
|
||||
s = re.sub(reEllipses, '\u2026', m)
|
||||
s = re.sub(reDash, lambda x: smart_dashes(x.group()), s)
|
||||
block.append_child(text(s))
|
||||
else:
|
||||
block.append_child(text(m))
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def parseNewline(self, block):
|
||||
"""
|
||||
Parse a newline. If it was preceded by two spaces, return a hard
|
||||
line break; otherwise a soft line break.
|
||||
"""
|
||||
# assume we're at a \n
|
||||
self.pos += 1
|
||||
lastc = block.last_child
|
||||
if lastc and lastc.t == 'text' and lastc.literal[-1] == ' ':
|
||||
linebreak = len(lastc.literal) >= 2 and lastc.literal[-2] == ' '
|
||||
lastc.literal = re.sub(reFinalSpace, '', lastc.literal)
|
||||
if linebreak:
|
||||
node = Node('linebreak', None)
|
||||
else:
|
||||
node = Node('softbreak', None)
|
||||
block.append_child(node)
|
||||
else:
|
||||
block.append_child(Node('softbreak', None))
|
||||
|
||||
# gobble leading spaces in next line
|
||||
self.match(reInitialSpace)
|
||||
return True
|
||||
|
||||
def parseReference(self, s, refmap):
|
||||
"""Attempt to parse a link reference, modifying refmap."""
|
||||
self.subject = s
|
||||
self.pos = 0
|
||||
startpos = self.pos
|
||||
|
||||
# label:
|
||||
match_chars = self.parseLinkLabel()
|
||||
if match_chars == 0 or match_chars == 2:
|
||||
return 0
|
||||
else:
|
||||
rawlabel = self.subject[:match_chars]
|
||||
|
||||
# colon:
|
||||
if (self.peek() == ':'):
|
||||
self.pos += 1
|
||||
else:
|
||||
self.pos = startpos
|
||||
return 0
|
||||
|
||||
# link url
|
||||
self.spnl()
|
||||
|
||||
dest = self.parseLinkDestination()
|
||||
if dest is None:
|
||||
self.pos = startpos
|
||||
return 0
|
||||
|
||||
beforetitle = self.pos
|
||||
self.spnl()
|
||||
title = None
|
||||
if self.pos != beforetitle:
|
||||
title = self.parseLinkTitle()
|
||||
if title is None:
|
||||
title = ''
|
||||
# rewind before spaces
|
||||
self.pos = beforetitle
|
||||
|
||||
# make sure we're at line end:
|
||||
at_line_end = True
|
||||
if self.match(reSpaceAtEndOfLine) is None:
|
||||
if title == '':
|
||||
at_line_end = False
|
||||
else:
|
||||
# the potential title we found is not at the line end,
|
||||
# but it could still be a legal link reference if we
|
||||
# discard the title
|
||||
title == ''
|
||||
# rewind before spaces
|
||||
self.pos = beforetitle
|
||||
# and instead check if the link URL is at the line end
|
||||
at_line_end = self.match(reSpaceAtEndOfLine) is not None
|
||||
|
||||
if not at_line_end:
|
||||
self.pos = startpos
|
||||
return 0
|
||||
|
||||
normlabel = normalize_reference(rawlabel)
|
||||
if normlabel == '':
|
||||
# label must contain non-whitespace characters
|
||||
self.pos = startpos
|
||||
return 0
|
||||
|
||||
if not refmap.get(normlabel):
|
||||
refmap[normlabel] = {
|
||||
'destination': dest,
|
||||
'title': title
|
||||
}
|
||||
return (self.pos - startpos)
|
||||
|
||||
def parseInline(self, block):
|
||||
"""
|
||||
Parse the next inline element in subject, advancing subject
|
||||
position.
|
||||
|
||||
On success, add the result to block's children and return True.
|
||||
On failure, return False.
|
||||
"""
|
||||
res = False
|
||||
c = self.peek()
|
||||
if c is None:
|
||||
return False
|
||||
if c == '\n':
|
||||
res = self.parseNewline(block)
|
||||
elif c == '\\':
|
||||
res = self.parseBackslash(block)
|
||||
elif c == '`':
|
||||
res = self.parseBackticks(block)
|
||||
elif c == '*' or c == '_':
|
||||
res = self.handleDelim(c, block)
|
||||
elif c == "'" or c == '"':
|
||||
res = self.options.get('smart') and self.handleDelim(c, block)
|
||||
elif c == '[':
|
||||
res = self.parseOpenBracket(block)
|
||||
elif c == '!':
|
||||
res = self.parseBang(block)
|
||||
elif c == ']':
|
||||
res = self.parseCloseBracket(block)
|
||||
elif c == '<':
|
||||
res = self.parseAutolink(block) or self.parseHtmlTag(block)
|
||||
elif c == '&':
|
||||
res = self.parseEntity(block)
|
||||
else:
|
||||
res = self.parseString(block)
|
||||
|
||||
if not res:
|
||||
self.pos += 1
|
||||
block.append_child(text(c))
|
||||
|
||||
return True
|
||||
|
||||
def parseInlines(self, block):
|
||||
"""
|
||||
Parse string content in block into inline children,
|
||||
using refmap to resolve references.
|
||||
"""
|
||||
self.subject = block.string_content.strip()
|
||||
self.pos = 0
|
||||
self.delimiters = None
|
||||
self.brackets = None
|
||||
while (self.parseInline(block)):
|
||||
pass
|
||||
# allow raw string to be garbage collected
|
||||
block.string_content = None
|
||||
self.processEmphasis(None)
|
||||
|
||||
parse = parseInlines
|
||||
41
.venv/lib/python3.8/site-packages/commonmark/main.py
Normal file
41
.venv/lib/python3.8/site-packages/commonmark/main.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
# 2014 - Bibek Kafle & Roland Shoemaker
|
||||
# 2015-2017 - Nikolas Nyby
|
||||
# Port of @jgm's commonmark.js implementation of the CommonMark spec.
|
||||
|
||||
# Basic usage:
|
||||
#
|
||||
# import commonmark
|
||||
# parser = commonmark.Parser()
|
||||
# renderer = commonmark.HtmlRenderer()
|
||||
# print(renderer.render(parser.parse('Hello *world*')))
|
||||
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
from commonmark.blocks import Parser
|
||||
from commonmark.dump import dumpAST, dumpJSON
|
||||
from commonmark.render.html import HtmlRenderer
|
||||
from commonmark.render.rst import ReStructuredTextRenderer
|
||||
|
||||
|
||||
def commonmark(text, format="html"):
|
||||
"""Render CommonMark into HTML, JSON or AST
|
||||
Optional keyword arguments:
|
||||
format: 'html' (default), 'json' or 'ast'
|
||||
|
||||
>>> commonmark("*hello!*")
|
||||
'<p><em>hello</em></p>\\n'
|
||||
"""
|
||||
parser = Parser()
|
||||
ast = parser.parse(text)
|
||||
if format not in ["html", "json", "ast", "rst"]:
|
||||
raise ValueError("format must be 'html', 'json' or 'ast'")
|
||||
if format == "html":
|
||||
renderer = HtmlRenderer()
|
||||
return renderer.render(ast)
|
||||
if format == "json":
|
||||
return dumpJSON(ast)
|
||||
if format == "ast":
|
||||
return dumpAST(ast)
|
||||
if format == "rst":
|
||||
renderer = ReStructuredTextRenderer()
|
||||
return renderer.render(ast)
|
||||
179
.venv/lib/python3.8/site-packages/commonmark/node.py
Normal file
179
.venv/lib/python3.8/site-packages/commonmark/node.py
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
|
||||
reContainer = re.compile(
|
||||
r'(document|block_quote|list|item|paragraph|'
|
||||
r'heading|emph|strong|link|image|'
|
||||
r'custom_inline|custom_block)')
|
||||
|
||||
|
||||
def is_container(node):
|
||||
return (re.search(reContainer, node.t) is not None)
|
||||
|
||||
|
||||
class NodeWalker(object):
|
||||
|
||||
def __init__(self, root):
|
||||
self.current = root
|
||||
self.root = root
|
||||
self.entering = True
|
||||
|
||||
def __next__(self):
|
||||
cur = self.current
|
||||
entering = self.entering
|
||||
|
||||
if cur is None:
|
||||
raise StopIteration
|
||||
|
||||
container = is_container(cur)
|
||||
|
||||
if entering and container:
|
||||
if cur.first_child:
|
||||
self.current = cur.first_child
|
||||
self.entering = True
|
||||
else:
|
||||
# stay on node but exit
|
||||
self.entering = False
|
||||
elif cur == self.root:
|
||||
self.current = None
|
||||
elif cur.nxt is None:
|
||||
self.current = cur.parent
|
||||
self.entering = False
|
||||
else:
|
||||
self.current = cur.nxt
|
||||
self.entering = True
|
||||
|
||||
return cur, entering
|
||||
|
||||
next = __next__
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def nxt(self):
|
||||
""" for backwards compatibility """
|
||||
try:
|
||||
cur, entering = next(self)
|
||||
return {
|
||||
'entering': entering,
|
||||
'node': cur,
|
||||
}
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def resume_at(self, node, entering):
|
||||
self.current = node
|
||||
self.entering = (entering is True)
|
||||
|
||||
|
||||
class Node(object):
|
||||
def __init__(self, node_type, sourcepos):
|
||||
self.t = node_type
|
||||
self.parent = None
|
||||
self.first_child = None
|
||||
self.last_child = None
|
||||
self.prv = None
|
||||
self.nxt = None
|
||||
self.sourcepos = sourcepos
|
||||
self.last_line_blank = False
|
||||
self.last_line_checked = False
|
||||
self.is_open = True
|
||||
self.string_content = ''
|
||||
self.literal = None
|
||||
self.list_data = {}
|
||||
self.info = None
|
||||
self.destination = None
|
||||
self.title = None
|
||||
self.is_fenced = False
|
||||
self.fence_char = None
|
||||
self.fence_length = 0
|
||||
self.fence_offset = None
|
||||
self.level = None
|
||||
self.on_enter = None
|
||||
self.on_exit = None
|
||||
|
||||
def __repr__(self):
|
||||
return "Node {} [{}]".format(self.t, self.literal)
|
||||
|
||||
def pretty(self):
|
||||
from pprint import pprint
|
||||
pprint(self.__dict__)
|
||||
|
||||
def normalize(self):
|
||||
prev = None
|
||||
for curr, _ in self.walker():
|
||||
if prev is None:
|
||||
prev = curr
|
||||
continue
|
||||
if prev.t == 'text' and curr.t == 'text':
|
||||
prev.literal += curr.literal
|
||||
curr.unlink()
|
||||
else:
|
||||
prev = curr
|
||||
|
||||
def is_container(self):
|
||||
return is_container(self)
|
||||
|
||||
def append_child(self, child):
|
||||
child.unlink()
|
||||
child.parent = self
|
||||
if self.last_child:
|
||||
self.last_child.nxt = child
|
||||
child.prv = self.last_child
|
||||
self.last_child = child
|
||||
else:
|
||||
self.first_child = child
|
||||
self.last_child = child
|
||||
|
||||
def prepend_child(self, child):
|
||||
child.unlink()
|
||||
child.parent = self
|
||||
if self.first_child:
|
||||
self.first_child.prv = child
|
||||
child.nxt = self.first_child
|
||||
self.first_child = child
|
||||
else:
|
||||
self.first_child = child
|
||||
self.last_child = child
|
||||
|
||||
def unlink(self):
|
||||
if self.prv:
|
||||
self.prv.nxt = self.nxt
|
||||
elif self.parent:
|
||||
self.parent.first_child = self.nxt
|
||||
|
||||
if self.nxt:
|
||||
self.nxt.prv = self.prv
|
||||
elif self.parent:
|
||||
self.parent.last_child = self.prv
|
||||
|
||||
self.parent = None
|
||||
self.nxt = None
|
||||
self.prv = None
|
||||
|
||||
def insert_after(self, sibling):
|
||||
sibling.unlink()
|
||||
sibling.nxt = self.nxt
|
||||
if sibling.nxt:
|
||||
sibling.nxt.prv = sibling
|
||||
sibling.prv = self
|
||||
self.nxt = sibling
|
||||
sibling.parent = self.parent
|
||||
if not sibling.nxt:
|
||||
sibling.parent.last_child = sibling
|
||||
|
||||
def insert_before(self, sibling):
|
||||
sibling.unlink()
|
||||
sibling.prv = self.prv
|
||||
if sibling.prv:
|
||||
sibling.prv.nxt = sibling
|
||||
sibling.nxt = self
|
||||
self.prv = sibling
|
||||
sibling.parent = self.parent
|
||||
if not sibling.prv:
|
||||
sibling.parent.first_child = sibling
|
||||
|
||||
def walker(self):
|
||||
return NodeWalker(self)
|
||||
|
|
@ -0,0 +1,165 @@
|
|||
"""Case-folding and whitespace normalization"""
|
||||
# Unicode Case Folding table has been derived from the following work:
|
||||
#
|
||||
# CaseFolding-12.0.0.txt
|
||||
# Date: 2019-01-22, 08:18:22 GMT
|
||||
# (c) 2019 Unicode(R) Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks
|
||||
# of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
import re
|
||||
import sys
|
||||
from builtins import str, chr
|
||||
|
||||
__all__ = ["normalize_reference"]
|
||||
|
||||
if sys.version_info < (3,) and sys.maxunicode <= 0xffff:
|
||||
# shim for Python 2.x UCS2 build
|
||||
_unichr = chr
|
||||
|
||||
def chr(cdp):
|
||||
if 0x10000 <= cdp < 0x110000:
|
||||
cdp -= 0x10000
|
||||
return (_unichr(0xd800 | (cdp >> 10)) +
|
||||
_unichr(0xdc00 | (cdp & 0x3ff)))
|
||||
return _unichr(cdp)
|
||||
|
||||
|
||||
def _parse_table(tbl):
|
||||
xlat = {}
|
||||
cur_i, cur_j = -1, 0
|
||||
for entry in tbl.split(';'):
|
||||
arr = entry.split(',')
|
||||
info = [int(x, 36) if x else 0 for x in arr[0].split(':')]
|
||||
arr = [int(x, 36) for x in arr[1:]]
|
||||
assert not any(x in xlat for x in arr)
|
||||
sfx = ''.join(map(chr, arr))
|
||||
streak, stride = 0, 1
|
||||
if len(info) == 2:
|
||||
fdt, delta = info
|
||||
elif len(info) == 3:
|
||||
fdt, streak, delta = info
|
||||
else:
|
||||
fdt, streak, delta, stride = info
|
||||
assert streak >= 0 and stride >= 1
|
||||
cur_i += fdt + 1
|
||||
cur_j -= delta
|
||||
assert cur_j != 0
|
||||
i = cur_i
|
||||
last = cur_i + streak
|
||||
while i <= last:
|
||||
# uniqueness and idempotency
|
||||
assert i not in xlat and i + cur_j not in xlat
|
||||
assert i not in arr
|
||||
xlat[i] = chr(i + cur_j) + sfx
|
||||
i += stride
|
||||
return xlat
|
||||
|
||||
|
||||
XLAT = _parse_table(
|
||||
# ===== Start of Unicode Case Folding table =====
|
||||
'1t:p:-w;37:-kn;a:m:kn;n:6:;6:3w,37;w:1a:-31:2;1b:5k,lj;1:4:-5k:2;6:e::'
|
||||
'2;f:-aa,32;:18:aa:2;19:3e;:4:-3e:2;5:7h;1:-da;:2:5t:2;3:-5p;:5p;1:1:-5'
|
||||
'o;1:5o;2:-26;:-3f;:-1;:5m;1:-5o;:-2;1:-4;:2;:5s;3:-5u;:-2;1:-1;:4:5x:2'
|
||||
';5:-61;:61;1:-61;2:61;1:-61;:61;1:1:-60;1:2:60:2;3:-62;:4:62:4;b:-1;:1'
|
||||
';1:-1;:1;1:-1;:g:1:2;i:g::2;h:av,lo;:-aw;:2:1:2;3:2q;:-15;:12:-1l:2;13'
|
||||
':3n;1:g:-3n:2;n:-8bu;:8bu;1:4k;:-8gb;2:8br;1:5g;:-7c;:-2;:8:1y:2;72:-3'
|
||||
'7;16:2:37:2;5:;8:-37;6:26;1:2:1;3:-r;1:1:1;1:m,lk,ld;:g:9;h:8:;c:b,lk,'
|
||||
'ld;h:k;c:-7;:12;:-5;3:-a;:7;1:m:-n:2;n:1j;:-6;2:c;:4;1:-1t;1:8;:-8;2:2'
|
||||
':3n;2:f:-5u;f:v:1c;27:w:v:2;15:1g::2;1h:-e;:c:e:2;e:2m::2;2o:11:-1b;2d'
|
||||
':2a,136;26w:11:-5mq;12:6::6;mo:5:5m0;1on:4sm;:-1;:-9;:1:-2;1:1;:-7;:-o'
|
||||
';:-vzb;7:16:tj7;18:2:;8y:44:-2bl:2;45:5yn,mp;:-b,lk;:-2,lm;:-1,lm;:p,j'
|
||||
'i;:-5xb;2:5wx,37;1:2m:-5yk:2;2v:7:9;f:5:;f:7:;f:7:;f:5:;7:5fn,lv;1:2,l'
|
||||
'v,lc;1:2,lv,ld;1:2,lv,n6;2:6:-5ft:2;e:7:;n:7:3c,qh;7:7:8,qh;7:7:-o,qh;'
|
||||
'7:7:8,qh;7:7:-1k,qh;7:7:8,qh;9:-6,qh;:5hc,qh;:6,qh;1:-3,n6;:1,n6,qh;:1'
|
||||
':-5j2;1:1:1u;1:5hd,qh;1:-6;3:-5h3,qh;:5ha,qh;:a,qh;1:-7,n6;:1,n6,qh;:3'
|
||||
':-5h6;3:5hb,qh;5:4,lk,lc;:1,lk,ld;2:3,n6;:1,lk,n6;:1:-5jq;1:1:2k;7:5h5'
|
||||
',lk,lc;:1,lk,ld;:5,lv;1:-2,n6;:1,lk,n6;:1:-5ju;1:1:2w;1:-2x;5:33,qh;:5'
|
||||
'h0,qh;:-4,qh;1:7,n6;:1,n6,qh;:1:-5gu;1:1:-2;1:5h1,qh;89:8a;3:o2;:-3d;6'
|
||||
':-6ea;19:f:c;y:f;mq:p:-p;1ft:1a:-m;2n:1b;1:8ag;:-5ch;:5c1;2:4:-8a0:2;5'
|
||||
':8bh;:-v;:y;:-1;1:3:-8bj:3;b:1:8cg;1:2q:-8cg:2;2y:2::2;6:nym::nym;nyn:'
|
||||
'16::2;1p:q::2;4h:c::2;f:1o::2;1y:2::2;3:r9h;:8:-r9h:2;c:;1:wmh;2:2:-wm'
|
||||
'h:2;5:i::2;j:wn9;:b;:-4;:-a;:3;1:-1e;:o;:-l;:-xbp;:a:pr:2;d:;1:1d;:wlv'
|
||||
';:-5cb;q1:27:2oo;fpr:jii,2u;:1,2x;:1,30;:1,2u,2x;:1,2u,30;:-c,38;:1,38'
|
||||
';c:-z8,12u;:1,12d;:1,12j;:-9,12u;:b,12l;sp:p:-1cjn;ym:13:-8;4v:z:;1jj:'
|
||||
'1e:-o;2e7:v:w;gwv:v:;o8v:x:-2'
|
||||
# ===== End of Unicode Case Folding table =====
|
||||
)
|
||||
|
||||
|
||||
def _check_native(tbl):
|
||||
"""
|
||||
Determine if Python's own native implementation
|
||||
subsumes the supplied case folding table
|
||||
"""
|
||||
try:
|
||||
for i in tbl:
|
||||
stv = chr(i)
|
||||
if stv.casefold() == stv:
|
||||
return False
|
||||
except AttributeError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
# Hoist version check out of function for performance
|
||||
SPACE_RE = re.compile(r'[ \t\r\n]+')
|
||||
if _check_native(XLAT):
|
||||
def normalize_reference(string):
|
||||
"""
|
||||
Normalize reference label: collapse internal whitespace
|
||||
to single space, remove leading/trailing whitespace, case fold.
|
||||
"""
|
||||
return SPACE_RE.sub(' ', string[1:-1].strip()).casefold()
|
||||
elif sys.version_info >= (3,) or sys.maxunicode > 0xffff:
|
||||
def normalize_reference(string):
|
||||
"""
|
||||
Normalize reference label: collapse internal whitespace
|
||||
to single space, remove leading/trailing whitespace, case fold.
|
||||
"""
|
||||
return SPACE_RE.sub(' ', string[1:-1].strip()).translate(XLAT)
|
||||
else:
|
||||
def _get_smp_regex():
|
||||
xls = sorted(x - 0x10000 for x in XLAT if x >= 0x10000)
|
||||
xls.append(-1)
|
||||
fmt, (dsh, opn, pip, cse) = str('\\u%04x'), str('-[|]')
|
||||
rga, srk, erk = [str(r'[ \t\r\n]+')], 0, -2
|
||||
for k in xls:
|
||||
new_hir = (erk ^ k) >> 10 != 0
|
||||
if new_hir or erk + 1 != k:
|
||||
if erk >= 0 and srk != erk:
|
||||
if srk + 1 != erk:
|
||||
rga.append(dsh)
|
||||
rga.append(fmt % (0xdc00 + (erk & 0x3ff)))
|
||||
if new_hir:
|
||||
if erk >= 0:
|
||||
rga.append(cse)
|
||||
if k < 0:
|
||||
break
|
||||
rga.append(pip)
|
||||
rga.append(fmt % (0xd800 + (k >> 10)))
|
||||
rga.append(opn)
|
||||
srk = k
|
||||
rga.append(fmt % (0xdc00 + (srk & 0x3ff)))
|
||||
erk = k
|
||||
return re.compile(str().join(rga))
|
||||
|
||||
def _subst_handler(matchobj):
|
||||
src = matchobj.group(0)
|
||||
hiv = ord(src[0])
|
||||
if hiv < 0xd800:
|
||||
return ' '
|
||||
return XLAT[0x10000 + ((hiv & 0x3ff) << 10) | (ord(src[1]) & 0x3ff)]
|
||||
|
||||
SMP_RE = _get_smp_regex()
|
||||
|
||||
def normalize_reference(string):
|
||||
"""
|
||||
Normalize reference label: collapse internal whitespace
|
||||
to single space, remove leading/trailing whitespace, case fold.
|
||||
"""
|
||||
return SMP_RE.sub(_subst_handler, string[1:-1].strip()).translate(XLAT)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
228
.venv/lib/python3.8/site-packages/commonmark/render/html.py
Normal file
228
.venv/lib/python3.8/site-packages/commonmark/render/html.py
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
import re
|
||||
from builtins import str
|
||||
from commonmark.common import escape_xml
|
||||
from commonmark.render.renderer import Renderer
|
||||
|
||||
|
||||
reUnsafeProtocol = re.compile(
|
||||
r'^javascript:|vbscript:|file:|data:', re.IGNORECASE)
|
||||
reSafeDataProtocol = re.compile(
|
||||
r'^data:image\/(?:png|gif|jpeg|webp)', re.IGNORECASE)
|
||||
|
||||
|
||||
def potentially_unsafe(url):
|
||||
return re.search(reUnsafeProtocol, url) and \
|
||||
(not re.search(reSafeDataProtocol, url))
|
||||
|
||||
|
||||
class HtmlRenderer(Renderer):
|
||||
def __init__(self, options={}):
|
||||
# by default, soft breaks are rendered as newlines in HTML
|
||||
options['softbreak'] = options.get('softbreak') or '\n'
|
||||
# set to "<br />" to make them hard breaks
|
||||
# set to " " if you want to ignore line wrapping in source
|
||||
|
||||
self.disable_tags = 0
|
||||
self.last_out = '\n'
|
||||
self.options = options
|
||||
|
||||
def escape(self, text):
|
||||
return escape_xml(text)
|
||||
|
||||
def tag(self, name, attrs=None, selfclosing=None):
|
||||
"""Helper function to produce an HTML tag."""
|
||||
if self.disable_tags > 0:
|
||||
return
|
||||
|
||||
self.buf += '<' + name
|
||||
if attrs and len(attrs) > 0:
|
||||
for attrib in attrs:
|
||||
self.buf += ' ' + attrib[0] + '="' + attrib[1] + '"'
|
||||
|
||||
if selfclosing:
|
||||
self.buf += ' /'
|
||||
|
||||
self.buf += '>'
|
||||
self.last_out = '>'
|
||||
|
||||
# Node methods #
|
||||
|
||||
def text(self, node, entering=None):
|
||||
self.out(node.literal)
|
||||
|
||||
def softbreak(self, node=None, entering=None):
|
||||
self.lit(self.options['softbreak'])
|
||||
|
||||
def linebreak(self, node=None, entering=None):
|
||||
self.tag('br', [], True)
|
||||
self.cr()
|
||||
|
||||
def link(self, node, entering):
|
||||
attrs = self.attrs(node)
|
||||
if entering:
|
||||
if not (self.options.get('safe') and
|
||||
potentially_unsafe(node.destination)):
|
||||
attrs.append(['href', self.escape(node.destination)])
|
||||
|
||||
if node.title:
|
||||
attrs.append(['title', self.escape(node.title)])
|
||||
|
||||
self.tag('a', attrs)
|
||||
else:
|
||||
self.tag('/a')
|
||||
|
||||
def image(self, node, entering):
|
||||
if entering:
|
||||
if self.disable_tags == 0:
|
||||
if self.options.get('safe') and \
|
||||
potentially_unsafe(node.destination):
|
||||
self.lit('<img src="" alt="')
|
||||
else:
|
||||
self.lit('<img src="' +
|
||||
self.escape(node.destination) +
|
||||
'" alt="')
|
||||
self.disable_tags += 1
|
||||
else:
|
||||
self.disable_tags -= 1
|
||||
if self.disable_tags == 0:
|
||||
if node.title:
|
||||
self.lit('" title="' + self.escape(node.title))
|
||||
self.lit('" />')
|
||||
|
||||
def emph(self, node, entering):
|
||||
self.tag('em' if entering else '/em')
|
||||
|
||||
def strong(self, node, entering):
|
||||
self.tag('strong' if entering else '/strong')
|
||||
|
||||
def paragraph(self, node, entering):
|
||||
grandparent = node.parent.parent
|
||||
attrs = self.attrs(node)
|
||||
if grandparent is not None and grandparent.t == 'list':
|
||||
if grandparent.list_data['tight']:
|
||||
return
|
||||
|
||||
if entering:
|
||||
self.cr()
|
||||
self.tag('p', attrs)
|
||||
else:
|
||||
self.tag('/p')
|
||||
self.cr()
|
||||
|
||||
def heading(self, node, entering):
|
||||
tagname = 'h' + str(node.level)
|
||||
attrs = self.attrs(node)
|
||||
if entering:
|
||||
self.cr()
|
||||
self.tag(tagname, attrs)
|
||||
else:
|
||||
self.tag('/' + tagname)
|
||||
self.cr()
|
||||
|
||||
def code(self, node, entering):
|
||||
self.tag('code')
|
||||
self.out(node.literal)
|
||||
self.tag('/code')
|
||||
|
||||
def code_block(self, node, entering):
|
||||
info_words = node.info.split() if node.info else []
|
||||
attrs = self.attrs(node)
|
||||
if len(info_words) > 0 and len(info_words[0]) > 0:
|
||||
attrs.append(['class', 'language-' +
|
||||
self.escape(info_words[0])])
|
||||
|
||||
self.cr()
|
||||
self.tag('pre')
|
||||
self.tag('code', attrs)
|
||||
self.out(node.literal)
|
||||
self.tag('/code')
|
||||
self.tag('/pre')
|
||||
self.cr()
|
||||
|
||||
def thematic_break(self, node, entering):
|
||||
attrs = self.attrs(node)
|
||||
self.cr()
|
||||
self.tag('hr', attrs, True)
|
||||
self.cr()
|
||||
|
||||
def block_quote(self, node, entering):
|
||||
attrs = self.attrs(node)
|
||||
if entering:
|
||||
self.cr()
|
||||
self.tag('blockquote', attrs)
|
||||
self.cr()
|
||||
else:
|
||||
self.cr()
|
||||
self.tag('/blockquote')
|
||||
self.cr()
|
||||
|
||||
def list(self, node, entering):
|
||||
tagname = 'ul' if node.list_data['type'] == 'bullet' else 'ol'
|
||||
attrs = self.attrs(node)
|
||||
if entering:
|
||||
start = node.list_data['start']
|
||||
if start is not None and start != 1:
|
||||
attrs.append(['start', str(start)])
|
||||
|
||||
self.cr()
|
||||
self.tag(tagname, attrs)
|
||||
self.cr()
|
||||
else:
|
||||
self.cr()
|
||||
self.tag('/' + tagname)
|
||||
self.cr()
|
||||
|
||||
def item(self, node, entering):
|
||||
attrs = self.attrs(node)
|
||||
if entering:
|
||||
self.tag('li', attrs)
|
||||
else:
|
||||
self.tag('/li')
|
||||
self.cr()
|
||||
|
||||
def html_inline(self, node, entering):
|
||||
if self.options.get('safe'):
|
||||
self.lit('<!-- raw HTML omitted -->')
|
||||
else:
|
||||
self.lit(node.literal)
|
||||
|
||||
def html_block(self, node, entering):
|
||||
self.cr()
|
||||
if self.options.get('safe'):
|
||||
self.lit('<!-- raw HTML omitted -->')
|
||||
else:
|
||||
self.lit(node.literal)
|
||||
self.cr()
|
||||
|
||||
def custom_inline(self, node, entering):
|
||||
if entering and node.on_enter:
|
||||
self.lit(node.on_enter)
|
||||
elif (not entering) and node.on_exit:
|
||||
self.lit(node.on_exit)
|
||||
|
||||
def custom_block(self, node, entering):
|
||||
self.cr()
|
||||
if entering and node.on_enter:
|
||||
self.lit(node.on_enter)
|
||||
elif (not entering) and node.on_exit:
|
||||
self.lit(node.on_exit)
|
||||
self.cr()
|
||||
|
||||
# Helper methods #
|
||||
|
||||
def out(self, s):
|
||||
self.lit(self.escape(s))
|
||||
|
||||
def attrs(self, node):
|
||||
att = []
|
||||
if self.options.get('sourcepos'):
|
||||
pos = node.sourcepos
|
||||
if pos:
|
||||
att.append(['data-sourcepos', str(pos[0][0]) + ':' +
|
||||
str(pos[0][1]) + '-' + str(pos[1][0]) + ':' +
|
||||
str(pos[1][1])])
|
||||
|
||||
return att
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class Renderer(object):
|
||||
def render(self, ast):
|
||||
"""Walks the AST and calls member methods for each Node type.
|
||||
|
||||
@param ast {Node} The root of the abstract syntax tree.
|
||||
"""
|
||||
walker = ast.walker()
|
||||
|
||||
self.buf = ''
|
||||
self.last_out = '\n'
|
||||
|
||||
event = walker.nxt()
|
||||
while event is not None:
|
||||
type_ = event['node'].t
|
||||
if hasattr(self, type_):
|
||||
getattr(self, type_)(event['node'], event['entering'])
|
||||
event = walker.nxt()
|
||||
|
||||
return self.buf
|
||||
|
||||
def lit(self, s):
|
||||
"""Concatenate a literal string to the buffer.
|
||||
|
||||
@param str {String} The string to concatenate.
|
||||
"""
|
||||
self.buf += s
|
||||
self.last_out = s
|
||||
|
||||
def cr(self):
|
||||
if self.last_out != '\n':
|
||||
self.lit('\n')
|
||||
|
||||
def out(self, s):
|
||||
"""Concatenate a string to the buffer possibly escaping the content.
|
||||
|
||||
Concrete renderer implementations should override this method.
|
||||
|
||||
@param str {String} The string to concatenate.
|
||||
"""
|
||||
self.lit(s)
|
||||
159
.venv/lib/python3.8/site-packages/commonmark/render/rst.py
Normal file
159
.venv/lib/python3.8/site-packages/commonmark/render/rst.py
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from commonmark.render.renderer import Renderer
|
||||
|
||||
|
||||
class ReStructuredTextRenderer(Renderer):
|
||||
"""
|
||||
Render reStructuredText from Markdown
|
||||
|
||||
Example:
|
||||
|
||||
.. code:: python
|
||||
|
||||
import commonmark
|
||||
|
||||
parser = commonmark.Parser()
|
||||
ast = parser.parse('Hello `inline code` example')
|
||||
|
||||
renderer = commonmark.ReStructuredTextRenderer()
|
||||
rst = renderer.render(ast)
|
||||
print(rst) # Hello ``inline code`` example
|
||||
"""
|
||||
def __init__(self, indent_char=' '):
|
||||
self.indent_char = indent_char
|
||||
self.indent_length = 0
|
||||
|
||||
def lit(self, s):
|
||||
if s == '\n':
|
||||
indent = '' # Avoid whitespace if we're just adding a newline
|
||||
elif self.last_out != '\n':
|
||||
indent = '' # Don't indent if we're in the middle of a line
|
||||
else:
|
||||
indent = self.indent_char * self.indent_length
|
||||
|
||||
return super(ReStructuredTextRenderer, self).lit(indent + s)
|
||||
|
||||
def cr(self):
|
||||
self.lit('\n')
|
||||
|
||||
def indent_lines(self, literal, indent_length=4):
|
||||
indent = self.indent_char * indent_length
|
||||
new_lines = []
|
||||
|
||||
for line in literal.splitlines():
|
||||
new_lines.append(indent + line)
|
||||
|
||||
return '\n'.join(new_lines)
|
||||
|
||||
# Nodes
|
||||
|
||||
def document(self, node, entering):
|
||||
pass
|
||||
|
||||
def softbreak(self, node, entering):
|
||||
self.cr()
|
||||
|
||||
def linebreak(self, node, entering):
|
||||
self.cr()
|
||||
self.cr()
|
||||
|
||||
def text(self, node, entering):
|
||||
self.out(node.literal)
|
||||
|
||||
def emph(self, node, entering):
|
||||
self.out('*')
|
||||
|
||||
def strong(self, node, entering):
|
||||
self.out('**')
|
||||
|
||||
def paragraph(self, node, entering):
|
||||
if node.parent.t == 'item':
|
||||
pass
|
||||
else:
|
||||
self.cr()
|
||||
|
||||
def link(self, node, entering):
|
||||
if entering:
|
||||
self.out('`')
|
||||
else:
|
||||
self.out(' <%s>`_' % node.destination)
|
||||
|
||||
def image(self, node, entering):
|
||||
directive = '.. image:: ' + node.destination
|
||||
|
||||
if entering:
|
||||
self.out(directive)
|
||||
self.cr()
|
||||
self.indent_length += 4
|
||||
self.out(':alt: ')
|
||||
else:
|
||||
self.indent_length -= 4
|
||||
|
||||
def code(self, node, entering):
|
||||
self.out('``')
|
||||
self.out(node.literal)
|
||||
self.out('``')
|
||||
|
||||
def code_block(self, node, entering):
|
||||
directive = '.. code::'
|
||||
language_name = None
|
||||
|
||||
info_words = node.info.split() if node.info else []
|
||||
if len(info_words) > 0 and len(info_words[0]) > 0:
|
||||
language_name = info_words[0]
|
||||
|
||||
if language_name:
|
||||
directive += ' ' + language_name
|
||||
|
||||
self.cr()
|
||||
self.out(directive)
|
||||
self.cr()
|
||||
self.cr()
|
||||
self.out(self.indent_lines(node.literal))
|
||||
self.cr()
|
||||
|
||||
def list(self, node, entering):
|
||||
if entering:
|
||||
self.cr()
|
||||
|
||||
def item(self, node, entering):
|
||||
tagname = '*' if node.list_data['type'] == 'bullet' else '#.'
|
||||
|
||||
if entering:
|
||||
self.out(tagname + ' ')
|
||||
else:
|
||||
self.cr()
|
||||
|
||||
def block_quote(self, node, entering):
|
||||
if entering:
|
||||
self.indent_length += 4
|
||||
else:
|
||||
self.indent_length -= 4
|
||||
|
||||
def heading(self, node, entering):
|
||||
heading_chars = [
|
||||
'#',
|
||||
'*',
|
||||
'=',
|
||||
'-',
|
||||
'^',
|
||||
'"'
|
||||
]
|
||||
|
||||
try:
|
||||
heading_char = heading_chars[node.level-1]
|
||||
except IndexError:
|
||||
# Default to the last level if we're in too deep
|
||||
heading_char = heading_chars[-1]
|
||||
|
||||
heading_length = len(node.first_child.literal)
|
||||
banner = heading_char * heading_length
|
||||
|
||||
if entering:
|
||||
self.cr()
|
||||
else:
|
||||
self.cr()
|
||||
self.out(banner)
|
||||
self.cr()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
172
.venv/lib/python3.8/site-packages/commonmark/tests/rst_tests.py
Normal file
172
.venv/lib/python3.8/site-packages/commonmark/tests/rst_tests.py
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
import unittest
|
||||
|
||||
import commonmark
|
||||
|
||||
|
||||
class TestCommonmark(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.parser = commonmark.Parser()
|
||||
self.renderer = commonmark.ReStructuredTextRenderer()
|
||||
|
||||
def render_rst(self, test_str):
|
||||
ast = self.parser.parse(test_str)
|
||||
rst = self.renderer.render(ast)
|
||||
|
||||
return rst
|
||||
|
||||
def assertEqualRender(self, src_markdown, expected_rst):
|
||||
rendered_rst = self.render_rst(src_markdown)
|
||||
self.assertEqual(rendered_rst, expected_rst)
|
||||
|
||||
def test_strong(self):
|
||||
src_markdown = 'Hello **Strong**'
|
||||
expected_rst = '\nHello **Strong**\n'
|
||||
self.assertEqualRender(src_markdown, expected_rst)
|
||||
|
||||
def test_emphasis(self):
|
||||
src_markdown = 'Hello *Emphasis*'
|
||||
expected_rst = '\nHello *Emphasis*\n'
|
||||
self.assertEqualRender(src_markdown, expected_rst)
|
||||
|
||||
def test_paragraph(self):
|
||||
src_markdown = 'Hello paragraph'
|
||||
expected_rst = '\nHello paragraph\n'
|
||||
self.assertEqualRender(src_markdown, expected_rst)
|
||||
|
||||
def test_link(self):
|
||||
src_markdown = '[Link](http://example.com)'
|
||||
expected_rst = '\n`Link <http://example.com>`_\n'
|
||||
self.assertEqualRender(src_markdown, expected_rst)
|
||||
|
||||
def test_image(self):
|
||||
src_markdown = ''
|
||||
expected_rst = """
|
||||
.. image:: http://placekitten.com/100/100
|
||||
:alt: Image
|
||||
"""
|
||||
self.assertEqualRender(src_markdown, expected_rst)
|
||||
|
||||
def test_code(self):
|
||||
src_markdown = 'Test `inline code` with backticks'
|
||||
expected_rst = '\nTest ``inline code`` with backticks\n'
|
||||
self.assertEqualRender(src_markdown, expected_rst)
|
||||
|
||||
def test_code_block(self):
|
||||
src_markdown = """
|
||||
```python
|
||||
# code block
|
||||
print '3 backticks or'
|
||||
print 'indent 4 spaces'
|
||||
```
|
||||
"""
|
||||
expected_rst = """
|
||||
.. code:: python
|
||||
|
||||
# code block
|
||||
print '3 backticks or'
|
||||
print 'indent 4 spaces'
|
||||
"""
|
||||
self.assertEqualRender(src_markdown, expected_rst)
|
||||
|
||||
def test_unordered_list(self):
|
||||
src_markdown = """
|
||||
This is a list:
|
||||
* List item
|
||||
* List item
|
||||
* List item
|
||||
"""
|
||||
expected_rst = """
|
||||
This is a list:
|
||||
|
||||
* List item
|
||||
* List item
|
||||
* List item
|
||||
"""
|
||||
self.assertEqualRender(src_markdown, expected_rst)
|
||||
|
||||
def test_ordered_list(self):
|
||||
src_markdown = """
|
||||
This is a ordered list:
|
||||
1. One
|
||||
2. Two
|
||||
3. Three
|
||||
"""
|
||||
expected_rst = """
|
||||
This is a ordered list:
|
||||
|
||||
#. One
|
||||
#. Two
|
||||
#. Three
|
||||
"""
|
||||
self.assertEqualRender(src_markdown, expected_rst)
|
||||
|
||||
def test_block_quote(self):
|
||||
src_markdown = """
|
||||
Before the blockquote:
|
||||
|
||||
> The blockquote
|
||||
|
||||
After the blockquote
|
||||
"""
|
||||
expected_rst = """
|
||||
Before the blockquote:
|
||||
|
||||
The blockquote
|
||||
|
||||
After the blockquote
|
||||
"""
|
||||
self.assertEqualRender(src_markdown, expected_rst)
|
||||
|
||||
def test_heading(self):
|
||||
src_markdown = '''
|
||||
# Heading 1
|
||||
|
||||
## Heading 2
|
||||
|
||||
### Heading 3
|
||||
|
||||
#### Heading 4
|
||||
|
||||
##### Heading 5
|
||||
|
||||
###### Heading 6
|
||||
'''
|
||||
expected_rst = '''
|
||||
Heading 1
|
||||
#########
|
||||
|
||||
Heading 2
|
||||
*********
|
||||
|
||||
Heading 3
|
||||
=========
|
||||
|
||||
Heading 4
|
||||
---------
|
||||
|
||||
Heading 5
|
||||
^^^^^^^^^
|
||||
|
||||
Heading 6
|
||||
"""""""""
|
||||
'''
|
||||
self.assertEqualRender(src_markdown, expected_rst)
|
||||
|
||||
def test_multiple_paragraphs(self):
|
||||
src_markdown = '''
|
||||
Start of first paragraph that
|
||||
continues on a new line
|
||||
|
||||
This is the second paragraph
|
||||
'''
|
||||
expected_rst = '''
|
||||
Start of first paragraph that
|
||||
continues on a new line
|
||||
|
||||
This is the second paragraph
|
||||
'''
|
||||
self.assertEqualRender(src_markdown, expected_rst)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
@ -0,0 +1,242 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
from __future__ import division, print_function, unicode_literals
|
||||
import re
|
||||
import timeit
|
||||
import codecs
|
||||
import argparse
|
||||
import sys
|
||||
from builtins import str
|
||||
from commonmark.render.html import HtmlRenderer
|
||||
from commonmark.main import Parser, dumpAST
|
||||
|
||||
|
||||
class colors(object):
|
||||
HEADER = '\033[95m'
|
||||
OKBLUE = '\033[94m'
|
||||
OKGREEN = '\033[92m'
|
||||
WARNING = '\033[93m'
|
||||
FAIL = '\033[91m'
|
||||
ENDC = '\033[0m'
|
||||
|
||||
|
||||
def trace_calls(frame, event, arg):
|
||||
co = frame.f_code
|
||||
func_name = co.co_name
|
||||
if func_name == "write":
|
||||
return
|
||||
line_no = frame.f_lineno
|
||||
filename = co.co_filename
|
||||
if event == "call" and not re.match("__", func_name) and \
|
||||
re.search("CommonMark.py", filename) \
|
||||
and func_name != "dumpAST":
|
||||
print("-> " + frame.f_back.f_code.co_name +
|
||||
" at " + str(frame.f_back.f_lineno) +
|
||||
" called " + func_name + " at " + str(line_no) +
|
||||
" in " + filename)
|
||||
return trace_calls
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="script to run the CommonMark specification tests " +
|
||||
"against the CommonMark.py parser")
|
||||
parser.add_argument(
|
||||
'-t',
|
||||
help="Single test to run or comma separated list " +
|
||||
"of tests (-t 10 or -t 10,11,12,13)")
|
||||
parser.add_argument(
|
||||
'-p',
|
||||
action="store_true",
|
||||
help="Print passed test information")
|
||||
parser.add_argument(
|
||||
'-f',
|
||||
action="store_true",
|
||||
help="Print failed tests (during -np...)")
|
||||
parser.add_argument(
|
||||
'-i',
|
||||
action="store_true",
|
||||
help="Interactive Markdown input mode")
|
||||
parser.add_argument(
|
||||
'-d',
|
||||
action="store_true",
|
||||
help="Debug, trace calls")
|
||||
parser.add_argument(
|
||||
'-np',
|
||||
action="store_true",
|
||||
help="Only print section header, tick, or cross")
|
||||
parser.add_argument(
|
||||
'-s',
|
||||
action="store_true",
|
||||
help="Print percent of tests passed by category")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.d:
|
||||
sys.settrace(trace_calls)
|
||||
|
||||
renderer = HtmlRenderer()
|
||||
parser = Parser()
|
||||
|
||||
f = codecs.open("spec.txt", encoding="utf-8")
|
||||
datalist = []
|
||||
for line in f:
|
||||
datalist.append(line)
|
||||
data = "".join(datalist)
|
||||
passed = 0
|
||||
failed = 0
|
||||
catStats = {}
|
||||
examples = []
|
||||
example_number = 0
|
||||
current_section = ""
|
||||
tabChar = '\u2192'
|
||||
spaceChar = '\u2423'
|
||||
nbspChar = '\u00A0'
|
||||
|
||||
def showSpaces(t):
|
||||
t = re.sub("\\t", tabChar, t)
|
||||
t = re.sub(" ", spaceChar, t)
|
||||
t = re.sub(nbspChar, spaceChar, t)
|
||||
return t
|
||||
|
||||
t = re.sub("\r\n", "\n", data)
|
||||
|
||||
tests = re.sub(
|
||||
re.compile("^<!-- END TESTS -->(.|[\n])*", flags=re.M), '', t)
|
||||
testMatch = re.findall(
|
||||
re.compile(
|
||||
r'^`{32} example\n'
|
||||
r'([\s\S]*?)^\.\n([\s\S]*?)'
|
||||
r'^`{32}$'
|
||||
r'|^#{1,6} *(.*)$',
|
||||
re.M),
|
||||
tests)
|
||||
|
||||
for match in testMatch:
|
||||
if not match[2] == "":
|
||||
current_section = match[2]
|
||||
else:
|
||||
example_number += 1
|
||||
examples.append({
|
||||
'markdown': match[0],
|
||||
'html': match[1],
|
||||
'section': current_section,
|
||||
'number': example_number})
|
||||
|
||||
current_section = ""
|
||||
|
||||
startTime = timeit.default_timer()
|
||||
|
||||
if args.i:
|
||||
print(
|
||||
colors.OKGREEN +
|
||||
"(To end input of Markdown block enter 'end' on " +
|
||||
"it's own line, to quit enter 'quit')" +
|
||||
colors.ENDC)
|
||||
while True:
|
||||
s = ""
|
||||
while True:
|
||||
if sys.version_info >= (3, 0):
|
||||
inp = input(colors.OKBLUE + 'Markdown: ' + colors.ENDC)
|
||||
else:
|
||||
inp = raw_input(colors.OKBLUE + 'Markdown: ' + colors.ENDC) # noqa
|
||||
|
||||
if not inp == "end" and inp != "quit":
|
||||
s += inp + "\n"
|
||||
elif inp == "end":
|
||||
s = s[:-1]
|
||||
break
|
||||
elif inp == "quit":
|
||||
print(colors.HEADER+"bye!"+colors.ENDC)
|
||||
exit(0)
|
||||
ast = parser.parse(s)
|
||||
html = renderer.render(ast)
|
||||
print(colors.WARNING+"="*10+"AST====="+colors.ENDC)
|
||||
dumpAST(ast)
|
||||
print(colors.WARNING+"="*10+"HTML===="+colors.ENDC)
|
||||
print(html)
|
||||
|
||||
# some tests?
|
||||
if args.t:
|
||||
tests = args.t.split(",")
|
||||
choice_examples = []
|
||||
for t in tests:
|
||||
if not t == "" and len(examples) > int(t):
|
||||
choice_examples.append(examples[int(t)-1])
|
||||
examples = choice_examples
|
||||
|
||||
# all tests
|
||||
|
||||
for i, example in enumerate(examples): # [0,examples[0]]
|
||||
if not example['section'] == "" and \
|
||||
not current_section == example['section']:
|
||||
print('\n' + colors.HEADER + '[' + example['section'] + ']' +
|
||||
colors.ENDC + ' ', end='')
|
||||
current_section = example['section']
|
||||
catStats.update({current_section: [0, 0, 0]})
|
||||
|
||||
catStats[current_section][2] += 1
|
||||
if args.d:
|
||||
print(colors.HEADER+"[Parsing]"+colors.ENDC)
|
||||
ast = parser.parse(re.sub(tabChar, "\t", example['markdown']))
|
||||
if args.d:
|
||||
print(colors.HEADER+"[Rendering]"+colors.ENDC)
|
||||
actual = renderer.render(ast)
|
||||
if re.sub('\t', tabChar, actual) == example['html']:
|
||||
passed += 1
|
||||
catStats[current_section][0] += 1
|
||||
if not args.f:
|
||||
print(colors.OKGREEN + '✓' + colors.ENDC, end='')
|
||||
if args.d:
|
||||
dumpAST(ast)
|
||||
if args.p or args.d and not args.np:
|
||||
print(
|
||||
colors.OKBLUE +
|
||||
"=== markdown ===============\n" +
|
||||
colors.ENDC + showSpaces(example['markdown']) +
|
||||
colors.OKBLUE +
|
||||
"\n=== expected ===============\n" +
|
||||
colors.ENDC + showSpaces(example['html']) +
|
||||
colors.OKBLUE +
|
||||
"\n=== got ====================\n" +
|
||||
colors.ENDC + showSpaces(actual))
|
||||
else:
|
||||
failed += 1
|
||||
catStats[current_section][1] += 1
|
||||
if args.t:
|
||||
print("Test #" + str(args.t.split(",")[i]), end='')
|
||||
else:
|
||||
print("Test #" + str(i+1), end='')
|
||||
print(' ' + colors.FAIL + "✗" + colors.ENDC)
|
||||
if args.d:
|
||||
dumpAST(ast)
|
||||
if not args.np or args.f:
|
||||
print(
|
||||
colors.WARNING +
|
||||
"=== markdown ===============\n" +
|
||||
colors.ENDC + showSpaces(example['markdown']) +
|
||||
colors.WARNING +
|
||||
"\n=== expected ===============\n" +
|
||||
colors.ENDC + showSpaces(example['html']) +
|
||||
colors.WARNING +
|
||||
"\n=== got ====================\n" +
|
||||
colors.ENDC + showSpaces(actual))
|
||||
|
||||
print('\n' + str(passed) + ' tests passed, ' + str(failed) + ' failed')
|
||||
|
||||
endTime = timeit.default_timer()
|
||||
runTime = endTime - startTime
|
||||
|
||||
if args.s:
|
||||
for i in catStats.keys():
|
||||
per = catStats[i][0]/catStats[i][2]
|
||||
print(colors.HEADER + "[" + i + "]" + colors.ENDC +
|
||||
"\t" + str(per*100) + "% Passed")
|
||||
|
||||
print("runtime: " + str(runTime) + "s")
|
||||
|
||||
if (failed > 0):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
157
.venv/lib/python3.8/site-packages/commonmark/tests/unit_tests.py
Normal file
157
.venv/lib/python3.8/site-packages/commonmark/tests/unit_tests.py
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import unittest
|
||||
|
||||
try:
|
||||
from hypothesis import given, example
|
||||
except ImportError:
|
||||
# Mock out hypothesis stuff for python 2.6
|
||||
def given(a):
|
||||
def func(b):
|
||||
return
|
||||
return func
|
||||
|
||||
example = given
|
||||
|
||||
try:
|
||||
from hypothesis.strategies import text
|
||||
except ImportError:
|
||||
def text():
|
||||
pass
|
||||
|
||||
|
||||
import commonmark
|
||||
from commonmark.blocks import Parser
|
||||
from commonmark.render.html import HtmlRenderer
|
||||
from commonmark.inlines import InlineParser
|
||||
from commonmark.node import NodeWalker, Node
|
||||
|
||||
|
||||
class TestCommonmark(unittest.TestCase):
|
||||
def test_output(self):
|
||||
s = commonmark.commonmark('*hello!*')
|
||||
self.assertEqual(s, '<p><em>hello!</em></p>\n')
|
||||
|
||||
def test_unicode(self):
|
||||
s = commonmark.commonmark('<div>\u2020</div>\n')
|
||||
self.assertEqual(s, '<div>\u2020</div>\n',
|
||||
'Unicode works in an HTML block.')
|
||||
commonmark.commonmark('* unicode: \u2020')
|
||||
commonmark.commonmark('# unicode: \u2020')
|
||||
commonmark.commonmark('```\n# unicode: \u2020\n```')
|
||||
|
||||
def test_null_string_bug(self):
|
||||
s = commonmark.commonmark('> sometext\n>\n\n')
|
||||
self.assertEqual(
|
||||
s,
|
||||
'<blockquote>\n<pre><code>sometext\n</code></pre>'
|
||||
'\n</blockquote>\n')
|
||||
|
||||
def test_normalize_contracts_text_nodes(self):
|
||||
md = '_a'
|
||||
ast = Parser().parse(md)
|
||||
|
||||
def assert_text_literals(text_literals):
|
||||
walker = ast.walker()
|
||||
document, _ = walker.next()
|
||||
self.assertEqual(document.t, 'document')
|
||||
paragraph, _ = walker.next()
|
||||
self.assertEqual(paragraph.t, 'paragraph')
|
||||
for literal in text_literals:
|
||||
text, _ = walker.next()
|
||||
self.assertEqual(text.t, 'text')
|
||||
self.assertEqual(text.literal, literal)
|
||||
paragraph, _ = walker.next()
|
||||
self.assertEqual(paragraph.t, 'paragraph')
|
||||
|
||||
assert_text_literals(['_', 'a'])
|
||||
ast.normalize()
|
||||
# assert text nodes are contracted
|
||||
assert_text_literals(['_a'])
|
||||
ast.normalize()
|
||||
# assert normalize() doesn't alter a normalized ast
|
||||
assert_text_literals(['_a'])
|
||||
|
||||
def test_dumpAST_orderedlist(self):
|
||||
md = '1.'
|
||||
ast = Parser().parse(md)
|
||||
commonmark.dumpAST(ast)
|
||||
|
||||
@given(text())
|
||||
def test_random_text(self, s):
|
||||
commonmark.commonmark(s)
|
||||
|
||||
def test_smart_dashes(self):
|
||||
md = 'a - b -- c --- d ---- e ----- f'
|
||||
EM = '\u2014'
|
||||
EN = '\u2013'
|
||||
expected_html = (
|
||||
'<p>'
|
||||
+ 'a - '
|
||||
+ 'b ' + EN + ' '
|
||||
+ 'c ' + EM + ' '
|
||||
+ 'd ' + EN + EN + ' '
|
||||
+ 'e ' + EM + EN + ' '
|
||||
+ 'f</p>\n')
|
||||
parser = commonmark.Parser(options=dict(smart=True))
|
||||
ast = parser.parse(md)
|
||||
renderer = commonmark.HtmlRenderer()
|
||||
html = renderer.render(ast)
|
||||
self.assertEqual(html, expected_html)
|
||||
|
||||
def test_regex_vulnerability_link_label(self):
|
||||
i = 200
|
||||
while i <= 2000:
|
||||
s = commonmark.commonmark('[' + ('\\' * i) + '\n')
|
||||
self.assertEqual(s, '<p>' + '[' + ('\\' * (i // 2)) + '</p>\n',
|
||||
'[\\\\... %d deep' % (i,))
|
||||
i *= 10
|
||||
|
||||
def test_regex_vulnerability_link_destination(self):
|
||||
i = 200
|
||||
while i <= 2000:
|
||||
s = commonmark.commonmark(('[](' * i) + '\n')
|
||||
self.assertEqual(s, '<p>' + ('[](' * i) + '</p>\n',
|
||||
'[]( %d deep' % (i,))
|
||||
i *= 10
|
||||
|
||||
|
||||
class TestHtmlRenderer(unittest.TestCase):
|
||||
def test_init(self):
|
||||
HtmlRenderer()
|
||||
|
||||
|
||||
class TestInlineParser(unittest.TestCase):
|
||||
def test_init(self):
|
||||
InlineParser()
|
||||
|
||||
|
||||
class TestNode(unittest.TestCase):
|
||||
def test_doc_node(self):
|
||||
Node('document', [[1, 1], [0, 0]])
|
||||
|
||||
|
||||
class TestNodeWalker(unittest.TestCase):
|
||||
def test_node_walker(self):
|
||||
node = Node('document', [[1, 1], [0, 0]])
|
||||
NodeWalker(node)
|
||||
|
||||
def test_node_walker_iter(self):
|
||||
node = Node('document', [[1, 1], [0, 0]])
|
||||
for subnode, entered in node.walker():
|
||||
pass
|
||||
|
||||
|
||||
class TestParser(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.parser = Parser()
|
||||
|
||||
@given(text())
|
||||
@example('')
|
||||
@example('* unicode: \u2020')
|
||||
def test_text(self, s):
|
||||
self.parser.parse(s)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue