init
This commit is contained in:
commit
38355d2442
9083 changed files with 1225834 additions and 0 deletions
71
.venv/lib/python3.8/site-packages/asttokens/line_numbers.py
Normal file
71
.venv/lib/python3.8/site-packages/asttokens/line_numbers.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
# Copyright 2016 Grist Labs, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import bisect
|
||||
import re
|
||||
|
||||
_line_start_re = re.compile(r'^', re.M)
|
||||
|
||||
class LineNumbers(object):
|
||||
"""
|
||||
Class to convert between character offsets in a text string, and pairs (line, column) of 1-based
|
||||
line and 0-based column numbers, as used by tokens and AST nodes.
|
||||
|
||||
This class expects unicode for input and stores positions in unicode. But it supports
|
||||
translating to and from utf8 offsets, which are used by ast parsing.
|
||||
"""
|
||||
def __init__(self, text):
|
||||
# A list of character offsets of each line's first character.
|
||||
self._line_offsets = [m.start(0) for m in _line_start_re.finditer(text)]
|
||||
self._text = text
|
||||
self._text_len = len(text)
|
||||
self._utf8_offset_cache = {} # maps line num to list of char offset for each byte in line
|
||||
|
||||
def from_utf8_col(self, line, utf8_column):
|
||||
"""
|
||||
Given a 1-based line number and 0-based utf8 column, returns a 0-based unicode column.
|
||||
"""
|
||||
offsets = self._utf8_offset_cache.get(line)
|
||||
if offsets is None:
|
||||
end_offset = self._line_offsets[line] if line < len(self._line_offsets) else self._text_len
|
||||
line_text = self._text[self._line_offsets[line - 1] : end_offset]
|
||||
|
||||
offsets = [i for i,c in enumerate(line_text) for byte in c.encode('utf8')]
|
||||
offsets.append(len(line_text))
|
||||
self._utf8_offset_cache[line] = offsets
|
||||
|
||||
return offsets[max(0, min(len(offsets)-1, utf8_column))]
|
||||
|
||||
def line_to_offset(self, line, column):
|
||||
"""
|
||||
Converts 1-based line number and 0-based column to 0-based character offset into text.
|
||||
"""
|
||||
line -= 1
|
||||
if line >= len(self._line_offsets):
|
||||
return self._text_len
|
||||
elif line < 0:
|
||||
return 0
|
||||
else:
|
||||
return min(self._line_offsets[line] + max(0, column), self._text_len)
|
||||
|
||||
def offset_to_line(self, offset):
|
||||
"""
|
||||
Converts 0-based character offset to pair (line, col) of 1-based line and 0-based column
|
||||
numbers.
|
||||
"""
|
||||
offset = max(0, min(self._text_len, offset))
|
||||
line_index = bisect.bisect_right(self._line_offsets, offset) - 1
|
||||
return (line_index + 1, offset - self._line_offsets[line_index])
|
||||
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue