You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
194 lines
5.9 KiB
194 lines
5.9 KiB
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
The MIT License (MIT)
|
|
|
|
Copyright (c) 2015-present Rapptz
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a
|
|
copy of this software and associated documentation files (the "Software"),
|
|
to deal in the Software without restriction, including without limitation
|
|
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
and/or sell copies of the Software, and to permit persons to whom the
|
|
Software is furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
DEALINGS IN THE SOFTWARE.
|
|
"""
|
|
|
|
from .errors import UnexpectedQuoteError, InvalidEndOfQuotedStringError, ExpectedClosingQuoteError
|
|
|
|
# map from opening quotes to closing quotes
|
|
_quotes = {
|
|
'"': '"',
|
|
"‘": "’",
|
|
"‚": "‛",
|
|
"“": "”",
|
|
"„": "‟",
|
|
"⹂": "⹂",
|
|
"「": "」",
|
|
"『": "』",
|
|
"〝": "〞",
|
|
"﹁": "﹂",
|
|
"﹃": "﹄",
|
|
""": """,
|
|
"「": "」",
|
|
"«": "»",
|
|
"‹": "›",
|
|
"《": "》",
|
|
"〈": "〉",
|
|
}
|
|
_all_quotes = set(_quotes.keys()) | set(_quotes.values())
|
|
|
|
class StringView:
|
|
def __init__(self, buffer):
|
|
self.index = 0
|
|
self.buffer = buffer
|
|
self.end = len(buffer)
|
|
self.previous = 0
|
|
|
|
@property
|
|
def current(self):
|
|
return None if self.eof else self.buffer[self.index]
|
|
|
|
@property
|
|
def eof(self):
|
|
return self.index >= self.end
|
|
|
|
def undo(self):
|
|
self.index = self.previous
|
|
|
|
def skip_ws(self):
|
|
pos = 0
|
|
while not self.eof:
|
|
try:
|
|
current = self.buffer[self.index + pos]
|
|
if not current.isspace():
|
|
break
|
|
pos += 1
|
|
except IndexError:
|
|
break
|
|
|
|
self.previous = self.index
|
|
self.index += pos
|
|
return self.previous != self.index
|
|
|
|
def skip_string(self, string):
|
|
strlen = len(string)
|
|
if self.buffer[self.index:self.index + strlen] == string:
|
|
self.previous = self.index
|
|
self.index += strlen
|
|
return True
|
|
return False
|
|
|
|
def read_rest(self):
|
|
result = self.buffer[self.index:]
|
|
self.previous = self.index
|
|
self.index = self.end
|
|
return result
|
|
|
|
def read(self, n):
|
|
result = self.buffer[self.index:self.index + n]
|
|
self.previous = self.index
|
|
self.index += n
|
|
return result
|
|
|
|
def get(self):
|
|
try:
|
|
result = self.buffer[self.index + 1]
|
|
except IndexError:
|
|
result = None
|
|
|
|
self.previous = self.index
|
|
self.index += 1
|
|
return result
|
|
|
|
def get_word(self):
|
|
pos = 0
|
|
while not self.eof:
|
|
try:
|
|
current = self.buffer[self.index + pos]
|
|
if current.isspace():
|
|
break
|
|
pos += 1
|
|
except IndexError:
|
|
break
|
|
self.previous = self.index
|
|
result = self.buffer[self.index:self.index + pos]
|
|
self.index += pos
|
|
return result
|
|
|
|
def get_quoted_word(self):
|
|
current = self.current
|
|
if current is None:
|
|
return None
|
|
|
|
close_quote = _quotes.get(current)
|
|
is_quoted = bool(close_quote)
|
|
if is_quoted:
|
|
result = []
|
|
_escaped_quotes = (current, close_quote)
|
|
else:
|
|
result = [current]
|
|
_escaped_quotes = _all_quotes
|
|
|
|
while not self.eof:
|
|
current = self.get()
|
|
if not current:
|
|
if is_quoted:
|
|
# unexpected EOF
|
|
raise ExpectedClosingQuoteError(close_quote)
|
|
return ''.join(result)
|
|
|
|
# currently we accept strings in the format of "hello world"
|
|
# to embed a quote inside the string you must escape it: "a \"world\""
|
|
if current == '\\':
|
|
next_char = self.get()
|
|
if not next_char:
|
|
# string ends with \ and no character after it
|
|
if is_quoted:
|
|
# if we're quoted then we're expecting a closing quote
|
|
raise ExpectedClosingQuoteError(close_quote)
|
|
# if we aren't then we just let it through
|
|
return ''.join(result)
|
|
|
|
if next_char in _escaped_quotes:
|
|
# escaped quote
|
|
result.append(next_char)
|
|
else:
|
|
# different escape character, ignore it
|
|
self.undo()
|
|
result.append(current)
|
|
continue
|
|
|
|
if not is_quoted and current in _all_quotes:
|
|
# we aren't quoted
|
|
raise UnexpectedQuoteError(current)
|
|
|
|
# closing quote
|
|
if is_quoted and current == close_quote:
|
|
next_char = self.get()
|
|
valid_eof = not next_char or next_char.isspace()
|
|
if not valid_eof:
|
|
raise InvalidEndOfQuotedStringError(next_char)
|
|
|
|
# we're quoted so it's okay
|
|
return ''.join(result)
|
|
|
|
if current.isspace() and not is_quoted:
|
|
# end of word found
|
|
return ''.join(result)
|
|
|
|
result.append(current)
|
|
|
|
|
|
def __repr__(self):
|
|
return '<StringView pos: {0.index} prev: {0.previous} end: {0.end} eof: {0.eof}>'.format(self)
|
|
|