init
This commit is contained in:
commit
38355d2442
9083 changed files with 1225834 additions and 0 deletions
908
.venv/lib/python3.8/site-packages/commonmark/blocks.py
Normal file
908
.venv/lib/python3.8/site-packages/commonmark/blocks.py
Normal file
|
|
@ -0,0 +1,908 @@
|
|||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import re
|
||||
from commonmark import common
|
||||
from commonmark.common import unescape_string
|
||||
from commonmark.inlines import InlineParser
|
||||
from commonmark.node import Node
|
||||
|
||||
|
||||
CODE_INDENT = 4
|
||||
reHtmlBlockOpen = [
|
||||
re.compile(r'.'), # dummy for 0
|
||||
re.compile(r'^<(?:script|pre|style)(?:\s|>|$)', re.IGNORECASE),
|
||||
re.compile(r'^<!--'),
|
||||
re.compile(r'^<[?]'),
|
||||
re.compile(r'^<![A-Z]'),
|
||||
re.compile(r'^<!\[CDATA\['),
|
||||
re.compile(
|
||||
r'^<[/]?(?:address|article|aside|base|basefont|blockquote|body|'
|
||||
r'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|'
|
||||
r'fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|'
|
||||
r'header|hr|html|iframe|legend|li|link|main|menu|menuitem|'
|
||||
r'nav|noframes|ol|optgroup|option|p|param|section|source|title|'
|
||||
r'summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)'
|
||||
r'(?:\s|[/]?[>]|$)',
|
||||
re.IGNORECASE),
|
||||
re.compile(
|
||||
'^(?:' + common.OPENTAG + '|' + common.CLOSETAG + ')\\s*$',
|
||||
re.IGNORECASE),
|
||||
]
|
||||
reHtmlBlockClose = [
|
||||
re.compile(r'.'), # dummy for 0
|
||||
re.compile(r'<\/(?:script|pre|style)>', re.IGNORECASE),
|
||||
re.compile(r'-->'),
|
||||
re.compile(r'\?>'),
|
||||
re.compile(r'>'),
|
||||
re.compile(r'\]\]>'),
|
||||
]
|
||||
reThematicBreak = re.compile(
|
||||
r'^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$')
|
||||
reMaybeSpecial = re.compile(r'^[#`~*+_=<>0-9-]')
|
||||
reNonSpace = re.compile(r'[^ \t\f\v\r\n]')
|
||||
reBulletListMarker = re.compile(r'^[*+-]')
|
||||
reOrderedListMarker = re.compile(r'^(\d{1,9})([.)])')
|
||||
reATXHeadingMarker = re.compile(r'^#{1,6}(?:[ \t]+|$)')
|
||||
reCodeFence = re.compile(r'^`{3,}(?!.*`)|^~{3,}')
|
||||
reClosingCodeFence = re.compile(r'^(?:`{3,}|~{3,})(?= *$)')
|
||||
reSetextHeadingLine = re.compile(r'^(?:=+|-+)[ \t]*$')
|
||||
reLineEnding = re.compile(r'\r\n|\n|\r')
|
||||
|
||||
|
||||
def is_blank(s):
|
||||
"""Returns True if string contains only space characters."""
|
||||
return re.search(reNonSpace, s) is None
|
||||
|
||||
|
||||
def is_space_or_tab(s):
|
||||
return s in (' ', '\t')
|
||||
|
||||
|
||||
def peek(ln, pos):
|
||||
if pos < len(ln):
|
||||
return ln[pos]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def ends_with_blank_line(block):
|
||||
""" Returns true if block ends with a blank line,
|
||||
descending if needed into lists and sublists."""
|
||||
while block:
|
||||
if block.last_line_blank:
|
||||
return True
|
||||
if not block.last_line_checked and \
|
||||
block.t in ('list', 'item'):
|
||||
block.last_line_checked = True
|
||||
block = block.last_child
|
||||
else:
|
||||
block.last_line_checked = True
|
||||
break
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def parse_list_marker(parser, container):
|
||||
""" Parse a list marker and return data on the marker (type,
|
||||
start, delimiter, bullet character, padding) or None."""
|
||||
rest = parser.current_line[parser.next_nonspace:]
|
||||
data = {
|
||||
'type': None,
|
||||
'tight': True, # lists are tight by default
|
||||
'bullet_char': None,
|
||||
'start': None,
|
||||
'delimiter': None,
|
||||
'padding': None,
|
||||
'marker_offset': parser.indent,
|
||||
}
|
||||
if parser.indent >= 4:
|
||||
return None
|
||||
m = re.search(reBulletListMarker, rest)
|
||||
m2 = re.search(reOrderedListMarker, rest)
|
||||
if m:
|
||||
data['type'] = 'bullet'
|
||||
data['bullet_char'] = m.group()[0]
|
||||
elif m2 and (container.t != 'paragraph' or m2.group(1) == '1'):
|
||||
m = m2
|
||||
data['type'] = 'ordered'
|
||||
data['start'] = int(m.group(1))
|
||||
data['delimiter'] = m.group(2)
|
||||
else:
|
||||
return None
|
||||
|
||||
# make sure we have spaces after
|
||||
nextc = peek(parser.current_line, parser.next_nonspace + len(m.group()))
|
||||
if not (nextc is None or nextc == '\t' or nextc == ' '):
|
||||
return None
|
||||
|
||||
# if it interrupts paragraph, make sure first line isn't blank
|
||||
if container.t == 'paragraph' and \
|
||||
not re.search(
|
||||
reNonSpace,
|
||||
parser.current_line[parser.next_nonspace + len(m.group()):]):
|
||||
return None
|
||||
|
||||
# we've got a match! advance offset and calculate padding
|
||||
parser.advance_next_nonspace() # to start of marker
|
||||
parser.advance_offset(len(m.group()), True) # to end of marker
|
||||
spaces_start_col = parser.column
|
||||
spaces_start_offset = parser.offset
|
||||
while True:
|
||||
parser.advance_offset(1, True)
|
||||
nextc = peek(parser.current_line, parser.offset)
|
||||
if parser.column - spaces_start_col < 5 and \
|
||||
is_space_or_tab(nextc):
|
||||
pass
|
||||
else:
|
||||
break
|
||||
blank_item = peek(parser.current_line, parser.offset) is None
|
||||
spaces_after_marker = parser.column - spaces_start_col
|
||||
if spaces_after_marker >= 5 or \
|
||||
spaces_after_marker < 1 or \
|
||||
blank_item:
|
||||
data['padding'] = len(m.group()) + 1
|
||||
parser.column = spaces_start_col
|
||||
parser.offset = spaces_start_offset
|
||||
if is_space_or_tab(peek(parser.current_line, parser.offset)):
|
||||
parser.advance_offset(1, True)
|
||||
else:
|
||||
data['padding'] = len(m.group()) + spaces_after_marker
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def lists_match(list_data, item_data):
|
||||
"""
|
||||
Returns True if the two list items are of the same type,
|
||||
with the same delimiter and bullet character. This is used
|
||||
in agglomerating list items into lists.
|
||||
"""
|
||||
return list_data.get('type') == item_data.get('type') and \
|
||||
list_data.get('delimiter') == item_data.get('delimiter') and \
|
||||
list_data.get('bullet_char') == item_data.get('bullet_char')
|
||||
|
||||
|
||||
class Block(object):
|
||||
accepts_lines = None
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return
|
||||
|
||||
|
||||
class Document(Block):
|
||||
accepts_lines = False
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return t != 'item'
|
||||
|
||||
|
||||
class List(Block):
|
||||
accepts_lines = False
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
item = block.first_child
|
||||
while item:
|
||||
# check for non-final list item ending with blank line:
|
||||
if ends_with_blank_line(item) and item.nxt:
|
||||
block.list_data['tight'] = False
|
||||
break
|
||||
# recurse into children of list item, to see if there are
|
||||
# spaces between any of them:
|
||||
subitem = item.first_child
|
||||
while subitem:
|
||||
if ends_with_blank_line(subitem) and \
|
||||
(item.nxt or subitem.nxt):
|
||||
block.list_data['tight'] = False
|
||||
break
|
||||
subitem = subitem.nxt
|
||||
item = item.nxt
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return t == 'item'
|
||||
|
||||
|
||||
class BlockQuote(Block):
|
||||
accepts_lines = False
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
ln = parser.current_line
|
||||
if not parser.indented and peek(ln, parser.next_nonspace) == '>':
|
||||
parser.advance_next_nonspace()
|
||||
parser.advance_offset(1, False)
|
||||
if is_space_or_tab(peek(ln, parser.offset)):
|
||||
parser.advance_offset(1, True)
|
||||
else:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return t != 'item'
|
||||
|
||||
|
||||
class Item(Block):
|
||||
accepts_lines = False
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
if parser.blank:
|
||||
if container.first_child is None:
|
||||
# Blank line after empty list item
|
||||
return 1
|
||||
else:
|
||||
parser.advance_next_nonspace()
|
||||
elif parser.indent >= (container.list_data['marker_offset'] +
|
||||
container.list_data['padding']):
|
||||
parser.advance_offset(
|
||||
container.list_data['marker_offset'] +
|
||||
container.list_data['padding'], True)
|
||||
else:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return t != 'item'
|
||||
|
||||
|
||||
class Heading(Block):
|
||||
accepts_lines = False
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
# A heading can never container > 1 line, so fail to match:
|
||||
return 1
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return False
|
||||
|
||||
|
||||
class ThematicBreak(Block):
|
||||
accepts_lines = False
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
# A thematic break can never container > 1 line, so fail to match:
|
||||
return 1
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return False
|
||||
|
||||
|
||||
class CodeBlock(Block):
|
||||
accepts_lines = True
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
ln = parser.current_line
|
||||
indent = parser.indent
|
||||
if container.is_fenced:
|
||||
match = indent <= 3 and \
|
||||
len(ln) >= parser.next_nonspace + 1 and \
|
||||
ln[parser.next_nonspace] == container.fence_char and \
|
||||
re.search(reClosingCodeFence, ln[parser.next_nonspace:])
|
||||
if match and len(match.group()) >= container.fence_length:
|
||||
# closing fence - we're at end of line, so we can return
|
||||
parser.finalize(container, parser.line_number)
|
||||
return 2
|
||||
else:
|
||||
# skip optional spaces of fence offset
|
||||
i = container.fence_offset
|
||||
while i > 0 and is_space_or_tab(peek(ln, parser.offset)):
|
||||
parser.advance_offset(1, True)
|
||||
i -= 1
|
||||
else:
|
||||
# indented
|
||||
if indent >= CODE_INDENT:
|
||||
parser.advance_offset(CODE_INDENT, True)
|
||||
elif parser.blank:
|
||||
parser.advance_next_nonspace()
|
||||
else:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
if block.is_fenced:
|
||||
# first line becomes info string
|
||||
content = block.string_content
|
||||
newline_pos = content.index('\n')
|
||||
first_line = content[0:newline_pos]
|
||||
rest = content[newline_pos + 1:]
|
||||
block.info = unescape_string(first_line.strip())
|
||||
block.literal = rest
|
||||
else:
|
||||
# indented
|
||||
block.literal = re.sub(r'(\n *)+$', '\n', block.string_content)
|
||||
|
||||
block.string_content = None
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return False
|
||||
|
||||
|
||||
class HtmlBlock(Block):
|
||||
accepts_lines = True
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
if parser.blank and (container.html_block_type == 6 or
|
||||
container.html_block_type == 7):
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
block.literal = re.sub(r'(\n *)+$', '', block.string_content)
|
||||
# allow GC
|
||||
block.string_content = None
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return False
|
||||
|
||||
|
||||
class Paragraph(Block):
|
||||
accepts_lines = True
|
||||
|
||||
@staticmethod
|
||||
def continue_(parser=None, container=None):
|
||||
return 1 if parser.blank else 0
|
||||
|
||||
@staticmethod
|
||||
def finalize(parser=None, block=None):
|
||||
has_reference_defs = False
|
||||
|
||||
# try parsing the beginning as link reference definitions:
|
||||
while peek(block.string_content, 0) == '[':
|
||||
pos = parser.inline_parser.parseReference(
|
||||
block.string_content, parser.refmap)
|
||||
if not pos:
|
||||
break
|
||||
block.string_content = block.string_content[pos:]
|
||||
has_reference_defs = True
|
||||
if has_reference_defs and is_blank(block.string_content):
|
||||
block.unlink()
|
||||
|
||||
@staticmethod
|
||||
def can_contain(t):
|
||||
return False
|
||||
|
||||
|
||||
class BlockStarts(object):
|
||||
"""Block start functions.
|
||||
|
||||
Return values:
|
||||
0 = no match
|
||||
1 = matched container, keep going
|
||||
2 = matched leaf, no more block starts
|
||||
"""
|
||||
METHODS = [
|
||||
'block_quote',
|
||||
'atx_heading',
|
||||
'fenced_code_block',
|
||||
'html_block',
|
||||
'setext_heading',
|
||||
'thematic_break',
|
||||
'list_item',
|
||||
'indented_code_block',
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def block_quote(parser, container=None):
|
||||
if not parser.indented and \
|
||||
peek(parser.current_line, parser.next_nonspace) == '>':
|
||||
parser.advance_next_nonspace()
|
||||
parser.advance_offset(1, False)
|
||||
# optional following space
|
||||
if is_space_or_tab(peek(parser.current_line, parser.offset)):
|
||||
parser.advance_offset(1, True)
|
||||
parser.close_unmatched_blocks()
|
||||
parser.add_child('block_quote', parser.next_nonspace)
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def atx_heading(parser, container=None):
|
||||
if not parser.indented:
|
||||
m = re.search(reATXHeadingMarker,
|
||||
parser.current_line[parser.next_nonspace:])
|
||||
if m:
|
||||
parser.advance_next_nonspace()
|
||||
parser.advance_offset(len(m.group()), False)
|
||||
parser.close_unmatched_blocks()
|
||||
container = parser.add_child('heading', parser.next_nonspace)
|
||||
# number of #s
|
||||
container.level = len(m.group().strip())
|
||||
# remove trailing ###s:
|
||||
container.string_content = re.sub(
|
||||
r'[ \t]+#+[ \t]*$', '', re.sub(
|
||||
r'^[ \t]*#+[ \t]*$',
|
||||
'',
|
||||
parser.current_line[parser.offset:]))
|
||||
parser.advance_offset(
|
||||
len(parser.current_line) - parser.offset, False)
|
||||
return 2
|
||||
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def fenced_code_block(parser, container=None):
|
||||
if not parser.indented:
|
||||
m = re.search(
|
||||
reCodeFence,
|
||||
parser.current_line[parser.next_nonspace:])
|
||||
if m:
|
||||
fence_length = len(m.group())
|
||||
parser.close_unmatched_blocks()
|
||||
container = parser.add_child(
|
||||
'code_block', parser.next_nonspace)
|
||||
container.is_fenced = True
|
||||
container.fence_length = fence_length
|
||||
container.fence_char = m.group()[0]
|
||||
container.fence_offset = parser.indent
|
||||
parser.advance_next_nonspace()
|
||||
parser.advance_offset(fence_length, False)
|
||||
return 2
|
||||
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def html_block(parser, container=None):
|
||||
if not parser.indented and \
|
||||
peek(parser.current_line, parser.next_nonspace) == '<':
|
||||
s = parser.current_line[parser.next_nonspace:]
|
||||
|
||||
for block_type in range(1, 8):
|
||||
if re.search(reHtmlBlockOpen[block_type], s) and \
|
||||
(block_type < 7 or container.t != 'paragraph'):
|
||||
parser.close_unmatched_blocks()
|
||||
# We don't adjust parser.offset;
|
||||
# spaces are part of the HTML block:
|
||||
b = parser.add_child('html_block', parser.offset)
|
||||
b.html_block_type = block_type
|
||||
return 2
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def setext_heading(parser, container=None):
|
||||
if not parser.indented and container.t == 'paragraph':
|
||||
m = re.search(
|
||||
reSetextHeadingLine,
|
||||
parser.current_line[parser.next_nonspace:])
|
||||
if m:
|
||||
parser.close_unmatched_blocks()
|
||||
# resolve reference link definitiosn
|
||||
while peek(container.string_content, 0) == '[':
|
||||
pos = parser.inline_parser.parseReference(
|
||||
container.string_content, parser.refmap)
|
||||
if not pos:
|
||||
break
|
||||
container.string_content = container.string_content[pos:]
|
||||
if container.string_content:
|
||||
heading = Node('heading', container.sourcepos)
|
||||
heading.level = 1 if m.group()[0] == '=' else 2
|
||||
heading.string_content = container.string_content
|
||||
container.insert_after(heading)
|
||||
container.unlink()
|
||||
parser.tip = heading
|
||||
parser.advance_offset(
|
||||
len(parser.current_line) - parser.offset, False)
|
||||
return 2
|
||||
else:
|
||||
return 0
|
||||
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def thematic_break(parser, container=None):
|
||||
if not parser.indented and re.search(
|
||||
reThematicBreak, parser.current_line[parser.next_nonspace:]):
|
||||
parser.close_unmatched_blocks()
|
||||
parser.add_child('thematic_break', parser.next_nonspace)
|
||||
parser.advance_offset(
|
||||
len(parser.current_line) - parser.offset, False)
|
||||
return 2
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def list_item(parser, container=None):
|
||||
if (not parser.indented or container.t == 'list'):
|
||||
data = parse_list_marker(parser, container)
|
||||
if data:
|
||||
parser.close_unmatched_blocks()
|
||||
|
||||
# add the list if needed
|
||||
if parser.tip.t != 'list' or \
|
||||
not lists_match(container.list_data, data):
|
||||
container = parser.add_child('list', parser.next_nonspace)
|
||||
container.list_data = data
|
||||
|
||||
# add the list item
|
||||
container = parser.add_child('item', parser.next_nonspace)
|
||||
container.list_data = data
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def indented_code_block(parser, container=None):
|
||||
if parser.indented and \
|
||||
parser.tip.t != 'paragraph' and \
|
||||
not parser.blank:
|
||||
# indented code
|
||||
parser.advance_offset(CODE_INDENT, True)
|
||||
parser.close_unmatched_blocks()
|
||||
parser.add_child('code_block', parser.offset)
|
||||
return 2
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
class Parser(object):
|
||||
def __init__(self, options={}):
|
||||
self.doc = Node('document', [[1, 1], [0, 0]])
|
||||
self.block_starts = BlockStarts()
|
||||
self.tip = self.doc
|
||||
self.oldtip = self.doc
|
||||
self.current_line = ''
|
||||
self.line_number = 0
|
||||
self.offset = 0
|
||||
self.column = 0
|
||||
self.next_nonspace = 0
|
||||
self.next_nonspace_column = 0
|
||||
self.indent = 0
|
||||
self.indented = False
|
||||
self.blank = False
|
||||
self.partially_consumed_tab = False
|
||||
self.all_closed = True
|
||||
self.last_matched_container = self.doc
|
||||
self.refmap = {}
|
||||
self.last_line_length = 0
|
||||
self.inline_parser = InlineParser(options)
|
||||
self.options = options
|
||||
|
||||
def add_line(self):
|
||||
""" Add a line to the block at the tip. We assume the tip
|
||||
can accept lines -- that check should be done before calling this."""
|
||||
if self.partially_consumed_tab:
|
||||
# Skip over tab
|
||||
self.offset += 1
|
||||
# Add space characters
|
||||
chars_to_tab = 4 - (self.column % 4)
|
||||
self.tip.string_content += (' ' * chars_to_tab)
|
||||
self.tip.string_content += (self.current_line[self.offset:] + '\n')
|
||||
|
||||
def add_child(self, tag, offset):
|
||||
""" Add block of type tag as a child of the tip. If the tip can't
|
||||
accept children, close and finalize it and try its parent,
|
||||
and so on til we find a block that can accept children."""
|
||||
while not self.blocks[self.tip.t].can_contain(tag):
|
||||
self.finalize(self.tip, self.line_number - 1)
|
||||
|
||||
column_number = offset + 1
|
||||
new_block = Node(tag, [[self.line_number, column_number], [0, 0]])
|
||||
new_block.string_content = ''
|
||||
self.tip.append_child(new_block)
|
||||
self.tip = new_block
|
||||
return new_block
|
||||
|
||||
def close_unmatched_blocks(self):
|
||||
"""Finalize and close any unmatched blocks."""
|
||||
if not self.all_closed:
|
||||
while self.oldtip != self.last_matched_container:
|
||||
parent = self.oldtip.parent
|
||||
self.finalize(self.oldtip, self.line_number - 1)
|
||||
self.oldtip = parent
|
||||
self.all_closed = True
|
||||
|
||||
def find_next_nonspace(self):
|
||||
current_line = self.current_line
|
||||
i = self.offset
|
||||
cols = self.column
|
||||
|
||||
try:
|
||||
c = current_line[i]
|
||||
except IndexError:
|
||||
c = ''
|
||||
while c != '':
|
||||
if c == ' ':
|
||||
i += 1
|
||||
cols += 1
|
||||
elif c == '\t':
|
||||
i += 1
|
||||
cols += (4 - (cols % 4))
|
||||
else:
|
||||
break
|
||||
|
||||
try:
|
||||
c = current_line[i]
|
||||
except IndexError:
|
||||
c = ''
|
||||
|
||||
self.blank = (c == '\n' or c == '\r' or c == '')
|
||||
self.next_nonspace = i
|
||||
self.next_nonspace_column = cols
|
||||
self.indent = self.next_nonspace_column - self.column
|
||||
self.indented = self.indent >= CODE_INDENT
|
||||
|
||||
def advance_next_nonspace(self):
|
||||
self.offset = self.next_nonspace
|
||||
self.column = self.next_nonspace_column
|
||||
self.partially_consumed_tab = False
|
||||
|
||||
def advance_offset(self, count, columns):
|
||||
current_line = self.current_line
|
||||
try:
|
||||
c = current_line[self.offset]
|
||||
except IndexError:
|
||||
c = None
|
||||
while count > 0 and c is not None:
|
||||
if c == '\t':
|
||||
chars_to_tab = 4 - (self.column % 4)
|
||||
if columns:
|
||||
self.partially_consumed_tab = chars_to_tab > count
|
||||
chars_to_advance = min(count, chars_to_tab)
|
||||
self.column += chars_to_advance
|
||||
self.offset += 0 if self.partially_consumed_tab else 1
|
||||
count -= chars_to_advance
|
||||
else:
|
||||
self.partially_consumed_tab = False
|
||||
self.column += chars_to_tab
|
||||
self.offset += 1
|
||||
count -= 1
|
||||
else:
|
||||
self.partially_consumed_tab = False
|
||||
self.offset += 1
|
||||
# assume ascii; block starts are ascii
|
||||
self.column += 1
|
||||
count -= 1
|
||||
try:
|
||||
c = current_line[self.offset]
|
||||
except IndexError:
|
||||
c = None
|
||||
|
||||
def incorporate_line(self, ln):
|
||||
"""Analyze a line of text and update the document appropriately.
|
||||
|
||||
We parse markdown text by calling this on each line of input,
|
||||
then finalizing the document.
|
||||
"""
|
||||
all_matched = True
|
||||
|
||||
container = self.doc
|
||||
self.oldtip = self.tip
|
||||
self.offset = 0
|
||||
self.column = 0
|
||||
self.blank = False
|
||||
self.partially_consumed_tab = False
|
||||
self.line_number += 1
|
||||
|
||||
# replace NUL characters for security
|
||||
if re.search(r'\u0000', ln) is not None:
|
||||
ln = re.sub(r'\0', '\uFFFD', ln)
|
||||
|
||||
self.current_line = ln
|
||||
|
||||
# For each containing block, try to parse the associated line start.
|
||||
# Bail out on failure: container will point to the last matching block.
|
||||
# Set all_matched to false if not all containers match.
|
||||
while True:
|
||||
last_child = container.last_child
|
||||
if not (last_child and last_child.is_open):
|
||||
break
|
||||
container = last_child
|
||||
|
||||
self.find_next_nonspace()
|
||||
|
||||
rv = self.blocks[container.t].continue_(self, container)
|
||||
if rv == 0:
|
||||
# we've matched, keep going
|
||||
pass
|
||||
elif rv == 1:
|
||||
# we've failed to match a block
|
||||
all_matched = False
|
||||
elif rv == 2:
|
||||
# we've hit end of line for fenced code close and can return
|
||||
self.last_line_length = len(ln)
|
||||
return
|
||||
else:
|
||||
raise ValueError(
|
||||
'continue_ returned illegal value, must be 0, 1, or 2')
|
||||
|
||||
if not all_matched:
|
||||
# back up to last matching block
|
||||
container = container.parent
|
||||
break
|
||||
|
||||
self.all_closed = (container == self.oldtip)
|
||||
self.last_matched_container = container
|
||||
|
||||
matched_leaf = container.t != 'paragraph' and \
|
||||
self.blocks[container.t].accepts_lines
|
||||
starts = self.block_starts
|
||||
starts_len = len(starts.METHODS)
|
||||
# Unless last matched container is a code block, try new container
|
||||
# starts, adding children to the last matched container:
|
||||
while not matched_leaf:
|
||||
self.find_next_nonspace()
|
||||
|
||||
# this is a little performance optimization:
|
||||
if not self.indented and \
|
||||
not re.search(reMaybeSpecial, ln[self.next_nonspace:]):
|
||||
self.advance_next_nonspace()
|
||||
break
|
||||
|
||||
i = 0
|
||||
while i < starts_len:
|
||||
res = getattr(starts, starts.METHODS[i])(self, container)
|
||||
if res == 1:
|
||||
container = self.tip
|
||||
break
|
||||
elif res == 2:
|
||||
container = self.tip
|
||||
matched_leaf = True
|
||||
break
|
||||
else:
|
||||
i += 1
|
||||
|
||||
if i == starts_len:
|
||||
# nothing matched
|
||||
self.advance_next_nonspace()
|
||||
break
|
||||
|
||||
# What remains at the offset is a text line. Add the text to the
|
||||
# appropriate container.
|
||||
if not self.all_closed and not self.blank and \
|
||||
self.tip.t == 'paragraph':
|
||||
# lazy paragraph continuation
|
||||
self.add_line()
|
||||
else:
|
||||
# not a lazy continuation
|
||||
# finalize any blocks not matched
|
||||
self.close_unmatched_blocks()
|
||||
if self.blank and container.last_child:
|
||||
container.last_child.last_line_blank = True
|
||||
|
||||
t = container.t
|
||||
|
||||
# Block quote lines are never blank as they start with >
|
||||
# and we don't count blanks in fenced code for purposes of
|
||||
# tight/loose lists or breaking out of lists. We also
|
||||
# don't set last_line_blank on an empty list item, or if we
|
||||
# just closed a fenced block.
|
||||
last_line_blank = self.blank and \
|
||||
not (t == 'block_quote' or
|
||||
(t == 'code_block' and container.is_fenced) or
|
||||
(t == 'item' and
|
||||
not container.first_child and
|
||||
container.sourcepos[0][0] == self.line_number))
|
||||
|
||||
# propagate last_line_blank up through parents:
|
||||
cont = container
|
||||
while cont:
|
||||
cont.last_line_blank = last_line_blank
|
||||
cont = cont.parent
|
||||
|
||||
if self.blocks[t].accepts_lines:
|
||||
self.add_line()
|
||||
# if HtmlBlock, check for end condition
|
||||
if t == 'html_block' and \
|
||||
container.html_block_type >= 1 and \
|
||||
container.html_block_type <= 5 and \
|
||||
re.search(
|
||||
reHtmlBlockClose[container.html_block_type],
|
||||
self.current_line[self.offset:]):
|
||||
self.finalize(container, self.line_number)
|
||||
elif self.offset < len(ln) and not self.blank:
|
||||
# create a paragraph container for one line
|
||||
container = self.add_child('paragraph', self.offset)
|
||||
self.advance_next_nonspace()
|
||||
self.add_line()
|
||||
|
||||
self.last_line_length = len(ln)
|
||||
|
||||
def finalize(self, block, line_number):
|
||||
""" Finalize a block. Close it and do any necessary postprocessing,
|
||||
e.g. creating string_content from strings, setting the 'tight'
|
||||
or 'loose' status of a list, and parsing the beginnings
|
||||
of paragraphs for reference definitions. Reset the tip to the
|
||||
parent of the closed block."""
|
||||
above = block.parent
|
||||
block.is_open = False
|
||||
block.sourcepos[1] = [line_number, self.last_line_length]
|
||||
|
||||
self.blocks[block.t].finalize(self, block)
|
||||
|
||||
self.tip = above
|
||||
|
||||
def process_inlines(self, block):
|
||||
"""
|
||||
Walk through a block & children recursively, parsing string content
|
||||
into inline content where appropriate.
|
||||
"""
|
||||
walker = block.walker()
|
||||
self.inline_parser.refmap = self.refmap
|
||||
self.inline_parser.options = self.options
|
||||
event = walker.nxt()
|
||||
while event is not None:
|
||||
node = event['node']
|
||||
t = node.t
|
||||
if not event['entering'] and (t == 'paragraph' or t == 'heading'):
|
||||
self.inline_parser.parse(node)
|
||||
event = walker.nxt()
|
||||
|
||||
def parse(self, my_input):
|
||||
""" The main parsing function. Returns a parsed document AST."""
|
||||
self.doc = Node('document', [[1, 1], [0, 0]])
|
||||
self.tip = self.doc
|
||||
self.refmap = {}
|
||||
self.line_number = 0
|
||||
self.last_line_length = 0
|
||||
self.offset = 0
|
||||
self.column = 0
|
||||
self.last_matched_container = self.doc
|
||||
self.current_line = ''
|
||||
lines = re.split(reLineEnding, my_input)
|
||||
length = len(lines)
|
||||
if len(my_input) > 0 and my_input[-1] == '\n':
|
||||
# ignore last blank line created by final newline
|
||||
length -= 1
|
||||
for i in range(length):
|
||||
self.incorporate_line(lines[i])
|
||||
while (self.tip):
|
||||
self.finalize(self.tip, length)
|
||||
self.process_inlines(self.doc)
|
||||
return self.doc
|
||||
|
||||
|
||||
CAMEL_RE = re.compile("(.)([A-Z](?:[a-z]+|(?<=[a-z0-9].)))")
|
||||
Parser.blocks = dict(
|
||||
(CAMEL_RE.sub(r'\1_\2', cls.__name__).lower(), cls)
|
||||
for cls in Block.__subclasses__())
|
||||
Loading…
Add table
Add a link
Reference in a new issue