add
This commit is contained in:
38
ccxt/static_dependencies/lark/__init__.py
Normal file
38
ccxt/static_dependencies/lark/__init__.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from .exceptions import (
|
||||
GrammarError,
|
||||
LarkError,
|
||||
LexError,
|
||||
ParseError,
|
||||
UnexpectedCharacters,
|
||||
UnexpectedEOF,
|
||||
UnexpectedInput,
|
||||
UnexpectedToken,
|
||||
)
|
||||
from .lark import Lark
|
||||
from .lexer import Token
|
||||
from .tree import ParseTree, Tree
|
||||
from .utils import logger
|
||||
from .visitors import Discard, Transformer, Transformer_NonRecursive, Visitor, v_args
|
||||
|
||||
__version__: str = "1.2.0"
|
||||
|
||||
__all__ = (
|
||||
"GrammarError",
|
||||
"LarkError",
|
||||
"LexError",
|
||||
"ParseError",
|
||||
"UnexpectedCharacters",
|
||||
"UnexpectedEOF",
|
||||
"UnexpectedInput",
|
||||
"UnexpectedToken",
|
||||
"Lark",
|
||||
"Token",
|
||||
"ParseTree",
|
||||
"Tree",
|
||||
"logger",
|
||||
"Discard",
|
||||
"Transformer",
|
||||
"Transformer_NonRecursive",
|
||||
"Visitor",
|
||||
"v_args",
|
||||
)
|
||||
6
ccxt/static_dependencies/lark/__pyinstaller/__init__.py
Normal file
6
ccxt/static_dependencies/lark/__pyinstaller/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
# For usage of lark with PyInstaller. See https://pyinstaller-sample-hook.readthedocs.io/en/latest/index.html
|
||||
|
||||
import os
|
||||
|
||||
def get_hook_dirs():
|
||||
return [os.path.dirname(__file__)]
|
||||
14
ccxt/static_dependencies/lark/__pyinstaller/hook-lark.py
Normal file
14
ccxt/static_dependencies/lark/__pyinstaller/hook-lark.py
Normal file
@@ -0,0 +1,14 @@
|
||||
#-----------------------------------------------------------------------------
|
||||
# Copyright (c) 2017-2020, PyInstaller Development Team.
|
||||
#
|
||||
# Distributed under the terms of the GNU General Public License (version 2
|
||||
# or later) with exception for distributing the bootloader.
|
||||
#
|
||||
# The full license is in the file COPYING.txt, distributed with this software.
|
||||
#
|
||||
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
from PyInstaller.utils.hooks import collect_data_files
|
||||
|
||||
datas = collect_data_files('lark')
|
||||
59
ccxt/static_dependencies/lark/ast_utils.py
Normal file
59
ccxt/static_dependencies/lark/ast_utils.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""
|
||||
Module of utilities for transforming a lark.Tree into a custom Abstract Syntax Tree (AST defined in classes)
|
||||
"""
|
||||
|
||||
import inspect, re
|
||||
import types
|
||||
from typing import Optional, Callable
|
||||
|
||||
from lark import Transformer, v_args
|
||||
|
||||
class Ast:
|
||||
"""Abstract class
|
||||
|
||||
Subclasses will be collected by `create_transformer()`
|
||||
"""
|
||||
pass
|
||||
|
||||
class AsList:
|
||||
"""Abstract class
|
||||
|
||||
Subclasses will be instantiated with the parse results as a single list, instead of as arguments.
|
||||
"""
|
||||
|
||||
class WithMeta:
|
||||
"""Abstract class
|
||||
|
||||
Subclasses will be instantiated with the Meta instance of the tree. (see ``v_args`` for more detail)
|
||||
"""
|
||||
pass
|
||||
|
||||
def camel_to_snake(name):
|
||||
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
|
||||
|
||||
def create_transformer(ast_module: types.ModuleType,
|
||||
transformer: Optional[Transformer]=None,
|
||||
decorator_factory: Callable=v_args) -> Transformer:
|
||||
"""Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST.
|
||||
|
||||
For each class, we create a corresponding rule in the transformer, with a matching name.
|
||||
CamelCase names will be converted into snake_case. Example: "CodeBlock" -> "code_block".
|
||||
|
||||
Classes starting with an underscore (`_`) will be skipped.
|
||||
|
||||
Parameters:
|
||||
ast_module: A Python module containing all the subclasses of ``ast_utils.Ast``
|
||||
transformer (Optional[Transformer]): An initial transformer. Its attributes may be overwritten.
|
||||
decorator_factory (Callable): An optional callable accepting two booleans, inline, and meta,
|
||||
and returning a decorator for the methods of ``transformer``. (default: ``v_args``).
|
||||
"""
|
||||
t = transformer or Transformer()
|
||||
|
||||
for name, obj in inspect.getmembers(ast_module):
|
||||
if not name.startswith('_') and inspect.isclass(obj):
|
||||
if issubclass(obj, Ast):
|
||||
wrapper = decorator_factory(inline=not issubclass(obj, AsList), meta=issubclass(obj, WithMeta))
|
||||
obj = wrapper(obj).__get__(t)
|
||||
setattr(t, camel_to_snake(name), obj)
|
||||
|
||||
return t
|
||||
86
ccxt/static_dependencies/lark/common.py
Normal file
86
ccxt/static_dependencies/lark/common.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from copy import deepcopy
|
||||
import sys
|
||||
from types import ModuleType
|
||||
from typing import Callable, Collection, Dict, Optional, TYPE_CHECKING, List
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .lark import PostLex
|
||||
from .lexer import Lexer
|
||||
from .grammar import Rule
|
||||
from typing import Union, Type
|
||||
from typing import Literal
|
||||
if sys.version_info >= (3, 10):
|
||||
from typing import TypeAlias
|
||||
else:
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from .utils import Serialize
|
||||
from .lexer import TerminalDef, Token
|
||||
|
||||
###{standalone
|
||||
|
||||
_ParserArgType: 'TypeAlias' = 'Literal["earley", "lalr", "cyk", "auto"]'
|
||||
_LexerArgType: 'TypeAlias' = 'Union[Literal["auto", "basic", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]'
|
||||
_LexerCallback = Callable[[Token], Token]
|
||||
ParserCallbacks = Dict[str, Callable]
|
||||
|
||||
class LexerConf(Serialize):
|
||||
__serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type'
|
||||
__serialize_namespace__ = TerminalDef,
|
||||
|
||||
terminals: Collection[TerminalDef]
|
||||
re_module: ModuleType
|
||||
ignore: Collection[str]
|
||||
postlex: 'Optional[PostLex]'
|
||||
callbacks: Dict[str, _LexerCallback]
|
||||
g_regex_flags: int
|
||||
skip_validation: bool
|
||||
use_bytes: bool
|
||||
lexer_type: Optional[_LexerArgType]
|
||||
strict: bool
|
||||
|
||||
def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None,
|
||||
callbacks: Optional[Dict[str, _LexerCallback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False, strict: bool=False):
|
||||
self.terminals = terminals
|
||||
self.terminals_by_name = {t.name: t for t in self.terminals}
|
||||
assert len(self.terminals) == len(self.terminals_by_name)
|
||||
self.ignore = ignore
|
||||
self.postlex = postlex
|
||||
self.callbacks = callbacks or {}
|
||||
self.g_regex_flags = g_regex_flags
|
||||
self.re_module = re_module
|
||||
self.skip_validation = skip_validation
|
||||
self.use_bytes = use_bytes
|
||||
self.strict = strict
|
||||
self.lexer_type = None
|
||||
|
||||
def _deserialize(self):
|
||||
self.terminals_by_name = {t.name: t for t in self.terminals}
|
||||
|
||||
def __deepcopy__(self, memo=None):
|
||||
return type(self)(
|
||||
deepcopy(self.terminals, memo),
|
||||
self.re_module,
|
||||
deepcopy(self.ignore, memo),
|
||||
deepcopy(self.postlex, memo),
|
||||
deepcopy(self.callbacks, memo),
|
||||
deepcopy(self.g_regex_flags, memo),
|
||||
deepcopy(self.skip_validation, memo),
|
||||
deepcopy(self.use_bytes, memo),
|
||||
)
|
||||
|
||||
class ParserConf(Serialize):
|
||||
__serialize_fields__ = 'rules', 'start', 'parser_type'
|
||||
|
||||
rules: List['Rule']
|
||||
callbacks: ParserCallbacks
|
||||
start: List[str]
|
||||
parser_type: _ParserArgType
|
||||
|
||||
def __init__(self, rules: List['Rule'], callbacks: ParserCallbacks, start: List[str]):
|
||||
assert isinstance(start, list)
|
||||
self.rules = rules
|
||||
self.callbacks = callbacks
|
||||
self.start = start
|
||||
|
||||
###}
|
||||
292
ccxt/static_dependencies/lark/exceptions.py
Normal file
292
ccxt/static_dependencies/lark/exceptions.py
Normal file
@@ -0,0 +1,292 @@
|
||||
from .utils import logger, NO_VALUE
|
||||
from typing import Mapping, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, Collection, TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .lexer import Token
|
||||
from .parsers.lalr_interactive_parser import InteractiveParser
|
||||
from .tree import Tree
|
||||
|
||||
###{standalone
|
||||
|
||||
class LarkError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ConfigurationError(LarkError, ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def assert_config(value, options: Collection, msg='Got %r, expected one of %s'):
|
||||
if value not in options:
|
||||
raise ConfigurationError(msg % (value, options))
|
||||
|
||||
|
||||
class GrammarError(LarkError):
|
||||
pass
|
||||
|
||||
|
||||
class ParseError(LarkError):
|
||||
pass
|
||||
|
||||
|
||||
class LexError(LarkError):
|
||||
pass
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
class UnexpectedInput(LarkError):
|
||||
"""UnexpectedInput Error.
|
||||
|
||||
Used as a base class for the following exceptions:
|
||||
|
||||
- ``UnexpectedCharacters``: The lexer encountered an unexpected string
|
||||
- ``UnexpectedToken``: The parser received an unexpected token
|
||||
- ``UnexpectedEOF``: The parser expected a token, but the input ended
|
||||
|
||||
After catching one of these exceptions, you may call the following helper methods to create a nicer error message.
|
||||
"""
|
||||
line: int
|
||||
column: int
|
||||
pos_in_stream = None
|
||||
state: Any
|
||||
_terminals_by_name = None
|
||||
interactive_parser: 'InteractiveParser'
|
||||
|
||||
def get_context(self, text: str, span: int=40) -> str:
|
||||
"""Returns a pretty string pinpointing the error in the text,
|
||||
with span amount of context characters around it.
|
||||
|
||||
Note:
|
||||
The parser doesn't hold a copy of the text it has to parse,
|
||||
so you have to provide it again
|
||||
"""
|
||||
assert self.pos_in_stream is not None, self
|
||||
pos = self.pos_in_stream
|
||||
start = max(pos - span, 0)
|
||||
end = pos + span
|
||||
if not isinstance(text, bytes):
|
||||
before = text[start:pos].rsplit('\n', 1)[-1]
|
||||
after = text[pos:end].split('\n', 1)[0]
|
||||
return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n'
|
||||
else:
|
||||
before = text[start:pos].rsplit(b'\n', 1)[-1]
|
||||
after = text[pos:end].split(b'\n', 1)[0]
|
||||
return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")
|
||||
|
||||
def match_examples(self, parse_fn: 'Callable[[str], Tree]',
|
||||
examples: Union[Mapping[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
|
||||
token_type_match_fallback: bool=False,
|
||||
use_accepts: bool=True
|
||||
) -> Optional[T]:
|
||||
"""Allows you to detect what's wrong in the input text by matching
|
||||
against example errors.
|
||||
|
||||
Given a parser instance and a dictionary mapping some label with
|
||||
some malformed syntax examples, it'll return the label for the
|
||||
example that bests matches the current error. The function will
|
||||
iterate the dictionary until it finds a matching error, and
|
||||
return the corresponding value.
|
||||
|
||||
For an example usage, see `examples/error_reporting_lalr.py`
|
||||
|
||||
Parameters:
|
||||
parse_fn: parse function (usually ``lark_instance.parse``)
|
||||
examples: dictionary of ``{'example_string': value}``.
|
||||
use_accepts: Recommended to keep this as ``use_accepts=True``.
|
||||
"""
|
||||
assert self.state is not None, "Not supported for this exception"
|
||||
|
||||
if isinstance(examples, Mapping):
|
||||
examples = examples.items()
|
||||
|
||||
candidate = (None, False)
|
||||
for i, (label, example) in enumerate(examples):
|
||||
assert not isinstance(example, str), "Expecting a list"
|
||||
|
||||
for j, malformed in enumerate(example):
|
||||
try:
|
||||
parse_fn(malformed)
|
||||
except UnexpectedInput as ut:
|
||||
if ut.state == self.state:
|
||||
if (
|
||||
use_accepts
|
||||
and isinstance(self, UnexpectedToken)
|
||||
and isinstance(ut, UnexpectedToken)
|
||||
and ut.accepts != self.accepts
|
||||
):
|
||||
logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
|
||||
(self.state, self.accepts, ut.accepts, i, j))
|
||||
continue
|
||||
if (
|
||||
isinstance(self, (UnexpectedToken, UnexpectedEOF))
|
||||
and isinstance(ut, (UnexpectedToken, UnexpectedEOF))
|
||||
):
|
||||
if ut.token == self.token: # Try exact match first
|
||||
logger.debug("Exact Match at example [%s][%s]" % (i, j))
|
||||
return label
|
||||
|
||||
if token_type_match_fallback:
|
||||
# Fallback to token types match
|
||||
if (ut.token.type == self.token.type) and not candidate[-1]:
|
||||
logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
|
||||
candidate = label, True
|
||||
|
||||
if candidate[0] is None:
|
||||
logger.debug("Same State match at example [%s][%s]" % (i, j))
|
||||
candidate = label, False
|
||||
|
||||
return candidate[0]
|
||||
|
||||
def _format_expected(self, expected):
|
||||
if self._terminals_by_name:
|
||||
d = self._terminals_by_name
|
||||
expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected]
|
||||
return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected)
|
||||
|
||||
|
||||
class UnexpectedEOF(ParseError, UnexpectedInput):
|
||||
"""An exception that is raised by the parser, when the input ends while it still expects a token.
|
||||
"""
|
||||
expected: 'List[Token]'
|
||||
|
||||
def __init__(self, expected, state=None, terminals_by_name=None):
|
||||
super(UnexpectedEOF, self).__init__()
|
||||
|
||||
self.expected = expected
|
||||
self.state = state
|
||||
from .lexer import Token
|
||||
self.token = Token("<EOF>", "") # , line=-1, column=-1, pos_in_stream=-1)
|
||||
self.pos_in_stream = -1
|
||||
self.line = -1
|
||||
self.column = -1
|
||||
self._terminals_by_name = terminals_by_name
|
||||
|
||||
|
||||
def __str__(self):
|
||||
message = "Unexpected end-of-input. "
|
||||
message += self._format_expected(self.expected)
|
||||
return message
|
||||
|
||||
|
||||
class UnexpectedCharacters(LexError, UnexpectedInput):
|
||||
"""An exception that is raised by the lexer, when it cannot match the next
|
||||
string of characters to any of its terminals.
|
||||
"""
|
||||
|
||||
allowed: Set[str]
|
||||
considered_tokens: Set[Any]
|
||||
|
||||
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
|
||||
terminals_by_name=None, considered_rules=None):
|
||||
super(UnexpectedCharacters, self).__init__()
|
||||
|
||||
# TODO considered_tokens and allowed can be figured out using state
|
||||
self.line = line
|
||||
self.column = column
|
||||
self.pos_in_stream = lex_pos
|
||||
self.state = state
|
||||
self._terminals_by_name = terminals_by_name
|
||||
|
||||
self.allowed = allowed
|
||||
self.considered_tokens = considered_tokens
|
||||
self.considered_rules = considered_rules
|
||||
self.token_history = token_history
|
||||
|
||||
if isinstance(seq, bytes):
|
||||
self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace")
|
||||
else:
|
||||
self.char = seq[lex_pos]
|
||||
self._context = self.get_context(seq)
|
||||
|
||||
|
||||
def __str__(self):
|
||||
message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column)
|
||||
message += '\n\n' + self._context
|
||||
if self.allowed:
|
||||
message += self._format_expected(self.allowed)
|
||||
if self.token_history:
|
||||
message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history)
|
||||
return message
|
||||
|
||||
|
||||
class UnexpectedToken(ParseError, UnexpectedInput):
|
||||
"""An exception that is raised by the parser, when the token it received
|
||||
doesn't match any valid step forward.
|
||||
|
||||
Parameters:
|
||||
token: The mismatched token
|
||||
expected: The set of expected tokens
|
||||
considered_rules: Which rules were considered, to deduce the expected tokens
|
||||
state: A value representing the parser state. Do not rely on its value or type.
|
||||
interactive_parser: An instance of ``InteractiveParser``, that is initialized to the point of failure,
|
||||
and can be used for debugging and error handling.
|
||||
|
||||
Note: These parameters are available as attributes of the instance.
|
||||
"""
|
||||
|
||||
expected: Set[str]
|
||||
considered_rules: Set[str]
|
||||
|
||||
def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
|
||||
super(UnexpectedToken, self).__init__()
|
||||
|
||||
# TODO considered_rules and expected can be figured out using state
|
||||
self.line = getattr(token, 'line', '?')
|
||||
self.column = getattr(token, 'column', '?')
|
||||
self.pos_in_stream = getattr(token, 'start_pos', None)
|
||||
self.state = state
|
||||
|
||||
self.token = token
|
||||
self.expected = expected # XXX deprecate? `accepts` is better
|
||||
self._accepts = NO_VALUE
|
||||
self.considered_rules = considered_rules
|
||||
self.interactive_parser = interactive_parser
|
||||
self._terminals_by_name = terminals_by_name
|
||||
self.token_history = token_history
|
||||
|
||||
|
||||
@property
|
||||
def accepts(self) -> Set[str]:
|
||||
if self._accepts is NO_VALUE:
|
||||
self._accepts = self.interactive_parser and self.interactive_parser.accepts()
|
||||
return self._accepts
|
||||
|
||||
def __str__(self):
|
||||
message = ("Unexpected token %r at line %s, column %s.\n%s"
|
||||
% (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected)))
|
||||
if self.token_history:
|
||||
message += "Previous tokens: %r\n" % self.token_history
|
||||
|
||||
return message
|
||||
|
||||
|
||||
|
||||
class VisitError(LarkError):
|
||||
"""VisitError is raised when visitors are interrupted by an exception
|
||||
|
||||
It provides the following attributes for inspection:
|
||||
|
||||
Parameters:
|
||||
rule: the name of the visit rule that failed
|
||||
obj: the tree-node or token that was being processed
|
||||
orig_exc: the exception that cause it to fail
|
||||
|
||||
Note: These parameters are available as attributes
|
||||
"""
|
||||
|
||||
obj: 'Union[Tree, Token]'
|
||||
orig_exc: Exception
|
||||
|
||||
def __init__(self, rule, obj, orig_exc):
|
||||
message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
|
||||
super(VisitError, self).__init__(message)
|
||||
|
||||
self.rule = rule
|
||||
self.obj = obj
|
||||
self.orig_exc = orig_exc
|
||||
|
||||
|
||||
class MissingVariableError(LarkError):
|
||||
pass
|
||||
|
||||
###}
|
||||
130
ccxt/static_dependencies/lark/grammar.py
Normal file
130
ccxt/static_dependencies/lark/grammar.py
Normal file
@@ -0,0 +1,130 @@
|
||||
from typing import Optional, Tuple, ClassVar, Sequence
|
||||
|
||||
from .utils import Serialize
|
||||
|
||||
###{standalone
|
||||
TOKEN_DEFAULT_PRIORITY = 0
|
||||
|
||||
|
||||
class Symbol(Serialize):
|
||||
__slots__ = ('name',)
|
||||
|
||||
name: str
|
||||
is_term: ClassVar[bool] = NotImplemented
|
||||
|
||||
def __init__(self, name: str) -> None:
|
||||
self.name = name
|
||||
|
||||
def __eq__(self, other):
|
||||
assert isinstance(other, Symbol), other
|
||||
return self.is_term == other.is_term and self.name == other.name
|
||||
|
||||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.name)
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%r)' % (type(self).__name__, self.name)
|
||||
|
||||
fullrepr = property(__repr__)
|
||||
|
||||
def renamed(self, f):
|
||||
return type(self)(f(self.name))
|
||||
|
||||
|
||||
class Terminal(Symbol):
|
||||
__serialize_fields__ = 'name', 'filter_out'
|
||||
|
||||
is_term: ClassVar[bool] = True
|
||||
|
||||
def __init__(self, name, filter_out=False):
|
||||
self.name = name
|
||||
self.filter_out = filter_out
|
||||
|
||||
@property
|
||||
def fullrepr(self):
|
||||
return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)
|
||||
|
||||
def renamed(self, f):
|
||||
return type(self)(f(self.name), self.filter_out)
|
||||
|
||||
|
||||
class NonTerminal(Symbol):
|
||||
__serialize_fields__ = 'name',
|
||||
|
||||
is_term: ClassVar[bool] = False
|
||||
|
||||
|
||||
class RuleOptions(Serialize):
|
||||
__serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices'
|
||||
|
||||
keep_all_tokens: bool
|
||||
expand1: bool
|
||||
priority: Optional[int]
|
||||
template_source: Optional[str]
|
||||
empty_indices: Tuple[bool, ...]
|
||||
|
||||
def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None:
|
||||
self.keep_all_tokens = keep_all_tokens
|
||||
self.expand1 = expand1
|
||||
self.priority = priority
|
||||
self.template_source = template_source
|
||||
self.empty_indices = empty_indices
|
||||
|
||||
def __repr__(self):
|
||||
return 'RuleOptions(%r, %r, %r, %r)' % (
|
||||
self.keep_all_tokens,
|
||||
self.expand1,
|
||||
self.priority,
|
||||
self.template_source
|
||||
)
|
||||
|
||||
|
||||
class Rule(Serialize):
|
||||
"""
|
||||
origin : a symbol
|
||||
expansion : a list of symbols
|
||||
order : index of this expansion amongst all rules of the same name
|
||||
"""
|
||||
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
|
||||
|
||||
__serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
|
||||
__serialize_namespace__ = Terminal, NonTerminal, RuleOptions
|
||||
|
||||
origin: NonTerminal
|
||||
expansion: Sequence[Symbol]
|
||||
order: int
|
||||
alias: Optional[str]
|
||||
options: RuleOptions
|
||||
_hash: int
|
||||
|
||||
def __init__(self, origin: NonTerminal, expansion: Sequence[Symbol],
|
||||
order: int=0, alias: Optional[str]=None, options: Optional[RuleOptions]=None):
|
||||
self.origin = origin
|
||||
self.expansion = expansion
|
||||
self.alias = alias
|
||||
self.order = order
|
||||
self.options = options or RuleOptions()
|
||||
self._hash = hash((self.origin, tuple(self.expansion)))
|
||||
|
||||
def _deserialize(self):
|
||||
self._hash = hash((self.origin, tuple(self.expansion)))
|
||||
|
||||
def __str__(self):
|
||||
return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion))
|
||||
|
||||
def __repr__(self):
|
||||
return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)
|
||||
|
||||
def __hash__(self):
|
||||
return self._hash
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, Rule):
|
||||
return False
|
||||
return self.origin == other.origin and self.expansion == other.expansion
|
||||
|
||||
|
||||
###}
|
||||
0
ccxt/static_dependencies/lark/grammars/__init__.py
Normal file
0
ccxt/static_dependencies/lark/grammars/__init__.py
Normal file
59
ccxt/static_dependencies/lark/grammars/common.lark
Normal file
59
ccxt/static_dependencies/lark/grammars/common.lark
Normal file
@@ -0,0 +1,59 @@
|
||||
// Basic terminals for common use
|
||||
|
||||
|
||||
//
|
||||
// Numbers
|
||||
//
|
||||
|
||||
DIGIT: "0".."9"
|
||||
HEXDIGIT: "a".."f"|"A".."F"|DIGIT
|
||||
|
||||
INT: DIGIT+
|
||||
SIGNED_INT: ["+"|"-"] INT
|
||||
DECIMAL: INT "." INT? | "." INT
|
||||
|
||||
// float = /-?\d+(\.\d+)?([eE][+-]?\d+)?/
|
||||
_EXP: ("e"|"E") SIGNED_INT
|
||||
FLOAT: INT _EXP | DECIMAL _EXP?
|
||||
SIGNED_FLOAT: ["+"|"-"] FLOAT
|
||||
|
||||
NUMBER: FLOAT | INT
|
||||
SIGNED_NUMBER: ["+"|"-"] NUMBER
|
||||
|
||||
//
|
||||
// Strings
|
||||
//
|
||||
_STRING_INNER: /.*?/
|
||||
_STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/
|
||||
|
||||
ESCAPED_STRING : "\"" _STRING_ESC_INNER "\""
|
||||
|
||||
|
||||
//
|
||||
// Names (Variables)
|
||||
//
|
||||
LCASE_LETTER: "a".."z"
|
||||
UCASE_LETTER: "A".."Z"
|
||||
|
||||
LETTER: UCASE_LETTER | LCASE_LETTER
|
||||
WORD: LETTER+
|
||||
|
||||
CNAME: ("_"|LETTER) ("_"|LETTER|DIGIT)*
|
||||
|
||||
|
||||
//
|
||||
// Whitespace
|
||||
//
|
||||
WS_INLINE: (" "|/\t/)+
|
||||
WS: /[ \t\f\r\n]/+
|
||||
|
||||
CR : /\r/
|
||||
LF : /\n/
|
||||
NEWLINE: (CR? LF)+
|
||||
|
||||
|
||||
// Comments
|
||||
SH_COMMENT: /#[^\n]*/
|
||||
CPP_COMMENT: /\/\/[^\n]*/
|
||||
C_COMMENT: "/*" /(.|\n)*?/ "*/"
|
||||
SQL_COMMENT: /--[^\n]*/
|
||||
62
ccxt/static_dependencies/lark/grammars/lark.lark
Normal file
62
ccxt/static_dependencies/lark/grammars/lark.lark
Normal file
@@ -0,0 +1,62 @@
|
||||
# Lark grammar of Lark's syntax
|
||||
# Note: Lark is not bootstrapped, its parser is implemented in load_grammar.py
|
||||
|
||||
start: (_item? _NL)* _item?
|
||||
|
||||
_item: rule
|
||||
| token
|
||||
| statement
|
||||
|
||||
rule: RULE rule_params priority? ":" expansions
|
||||
token: TOKEN token_params priority? ":" expansions
|
||||
|
||||
rule_params: ["{" RULE ("," RULE)* "}"]
|
||||
token_params: ["{" TOKEN ("," TOKEN)* "}"]
|
||||
|
||||
priority: "." NUMBER
|
||||
|
||||
statement: "%ignore" expansions -> ignore
|
||||
| "%import" import_path ["->" name] -> import
|
||||
| "%import" import_path name_list -> multi_import
|
||||
| "%override" rule -> override_rule
|
||||
| "%declare" name+ -> declare
|
||||
|
||||
!import_path: "."? name ("." name)*
|
||||
name_list: "(" name ("," name)* ")"
|
||||
|
||||
?expansions: alias (_VBAR alias)*
|
||||
|
||||
?alias: expansion ["->" RULE]
|
||||
|
||||
?expansion: expr*
|
||||
|
||||
?expr: atom [OP | "~" NUMBER [".." NUMBER]]
|
||||
|
||||
?atom: "(" expansions ")"
|
||||
| "[" expansions "]" -> maybe
|
||||
| value
|
||||
|
||||
?value: STRING ".." STRING -> literal_range
|
||||
| name
|
||||
| (REGEXP | STRING) -> literal
|
||||
| name "{" value ("," value)* "}" -> template_usage
|
||||
|
||||
name: RULE
|
||||
| TOKEN
|
||||
|
||||
_VBAR: _NL? "|"
|
||||
OP: /[+*]|[?](?![a-z])/
|
||||
RULE: /!?[_?]?[a-z][_a-z0-9]*/
|
||||
TOKEN: /_?[A-Z][_A-Z0-9]*/
|
||||
STRING: _STRING "i"?
|
||||
REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/])*?\/[imslux]*/
|
||||
_NL: /(\r?\n)+\s*/
|
||||
|
||||
%import common.ESCAPED_STRING -> _STRING
|
||||
%import common.SIGNED_INT -> NUMBER
|
||||
%import common.WS_INLINE
|
||||
|
||||
COMMENT: /\s*/ "//" /[^\n]/* | /\s*/ "#" /[^\n]/*
|
||||
|
||||
%ignore WS_INLINE
|
||||
%ignore COMMENT
|
||||
302
ccxt/static_dependencies/lark/grammars/python.lark
Normal file
302
ccxt/static_dependencies/lark/grammars/python.lark
Normal file
@@ -0,0 +1,302 @@
|
||||
// Python 3 grammar for Lark
|
||||
|
||||
// This grammar should parse all python 3.x code successfully.
|
||||
|
||||
// Adapted from: https://docs.python.org/3/reference/grammar.html
|
||||
|
||||
// Start symbols for the grammar:
|
||||
// single_input is a single interactive statement;
|
||||
// file_input is a module or sequence of commands read from an input file;
|
||||
// eval_input is the input for the eval() functions.
|
||||
// NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
//
|
||||
|
||||
single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE
|
||||
file_input: (_NEWLINE | stmt)*
|
||||
eval_input: testlist _NEWLINE*
|
||||
|
||||
decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||
|
||||
async_funcdef: "async" funcdef
|
||||
funcdef: "def" name "(" [parameters] ")" ["->" test] ":" suite
|
||||
|
||||
parameters: paramvalue ("," paramvalue)* ["," SLASH ("," paramvalue)*] ["," [starparams | kwparams]]
|
||||
| starparams
|
||||
| kwparams
|
||||
|
||||
SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result
|
||||
starparams: (starparam | starguard) poststarparams
|
||||
starparam: "*" typedparam
|
||||
starguard: "*"
|
||||
poststarparams: ("," paramvalue)* ["," kwparams]
|
||||
kwparams: "**" typedparam ","?
|
||||
|
||||
?paramvalue: typedparam ("=" test)?
|
||||
?typedparam: name (":" test)?
|
||||
|
||||
|
||||
lambdef: "lambda" [lambda_params] ":" test
|
||||
lambdef_nocond: "lambda" [lambda_params] ":" test_nocond
|
||||
lambda_params: lambda_paramvalue ("," lambda_paramvalue)* ["," [lambda_starparams | lambda_kwparams]]
|
||||
| lambda_starparams
|
||||
| lambda_kwparams
|
||||
?lambda_paramvalue: name ("=" test)?
|
||||
lambda_starparams: "*" [name] ("," lambda_paramvalue)* ["," [lambda_kwparams]]
|
||||
lambda_kwparams: "**" name ","?
|
||||
|
||||
|
||||
?stmt: simple_stmt | compound_stmt
|
||||
?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE
|
||||
?small_stmt: (expr_stmt | assign_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr
|
||||
assign_stmt: annassign | augassign | assign
|
||||
|
||||
annassign: testlist_star_expr ":" test ["=" test]
|
||||
assign: testlist_star_expr ("=" (yield_expr|testlist_star_expr))+
|
||||
augassign: testlist_star_expr augassign_op (yield_expr|testlist)
|
||||
!augassign_op: "+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//="
|
||||
?testlist_star_expr: test_or_star_expr
|
||||
| test_or_star_expr ("," test_or_star_expr)+ ","? -> tuple
|
||||
| test_or_star_expr "," -> tuple
|
||||
|
||||
// For normal and annotated assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: "del" exprlist
|
||||
pass_stmt: "pass"
|
||||
?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: "break"
|
||||
continue_stmt: "continue"
|
||||
return_stmt: "return" [testlist]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: "raise" [test ["from" test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: "import" dotted_as_names
|
||||
// note below: the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS
|
||||
import_from: "from" (dots? dotted_name | dots) "import" ("*" | "(" import_as_names ")" | import_as_names)
|
||||
!dots: "."+
|
||||
import_as_name: name ["as" name]
|
||||
dotted_as_name: dotted_name ["as" name]
|
||||
import_as_names: import_as_name ("," import_as_name)* [","]
|
||||
dotted_as_names: dotted_as_name ("," dotted_as_name)*
|
||||
dotted_name: name ("." name)*
|
||||
global_stmt: "global" name ("," name)*
|
||||
nonlocal_stmt: "nonlocal" name ("," name)*
|
||||
assert_stmt: "assert" test ["," test]
|
||||
|
||||
?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | match_stmt
|
||||
| with_stmt | funcdef | classdef | decorated | async_stmt
|
||||
async_stmt: "async" (funcdef | with_stmt | for_stmt)
|
||||
if_stmt: "if" test ":" suite elifs ["else" ":" suite]
|
||||
elifs: elif_*
|
||||
elif_: "elif" test ":" suite
|
||||
while_stmt: "while" test ":" suite ["else" ":" suite]
|
||||
for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite]
|
||||
try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally]
|
||||
| "try" ":" suite finally -> try_finally
|
||||
finally: "finally" ":" suite
|
||||
except_clauses: except_clause+
|
||||
except_clause: "except" [test ["as" name]] ":" suite
|
||||
// NB compile.c makes sure that the default except clause is last
|
||||
|
||||
|
||||
with_stmt: "with" with_items ":" suite
|
||||
with_items: with_item ("," with_item)*
|
||||
with_item: test ["as" name]
|
||||
|
||||
match_stmt: "match" test ":" _NEWLINE _INDENT case+ _DEDENT
|
||||
|
||||
case: "case" pattern ["if" test] ":" suite
|
||||
|
||||
?pattern: sequence_item_pattern "," _sequence_pattern -> sequence_pattern
|
||||
| as_pattern
|
||||
?as_pattern: or_pattern ("as" NAME)?
|
||||
?or_pattern: closed_pattern ("|" closed_pattern)*
|
||||
?closed_pattern: literal_pattern
|
||||
| NAME -> capture_pattern
|
||||
| "_" -> any_pattern
|
||||
| attr_pattern
|
||||
| "(" as_pattern ")"
|
||||
| "[" _sequence_pattern "]" -> sequence_pattern
|
||||
| "(" (sequence_item_pattern "," _sequence_pattern)? ")" -> sequence_pattern
|
||||
| "{" (mapping_item_pattern ("," mapping_item_pattern)* ","?)?"}" -> mapping_pattern
|
||||
| "{" (mapping_item_pattern ("," mapping_item_pattern)* ",")? "**" NAME ","? "}" -> mapping_star_pattern
|
||||
| class_pattern
|
||||
|
||||
literal_pattern: inner_literal_pattern
|
||||
|
||||
?inner_literal_pattern: "None" -> const_none
|
||||
| "True" -> const_true
|
||||
| "False" -> const_false
|
||||
| STRING -> string
|
||||
| number
|
||||
|
||||
attr_pattern: NAME ("." NAME)+ -> value
|
||||
|
||||
name_or_attr_pattern: NAME ("." NAME)* -> value
|
||||
|
||||
mapping_item_pattern: (literal_pattern|attr_pattern) ":" as_pattern
|
||||
|
||||
_sequence_pattern: (sequence_item_pattern ("," sequence_item_pattern)* ","?)?
|
||||
?sequence_item_pattern: as_pattern
|
||||
| "*" NAME -> star_pattern
|
||||
|
||||
class_pattern: name_or_attr_pattern "(" [arguments_pattern ","?] ")"
|
||||
arguments_pattern: pos_arg_pattern ["," keyws_arg_pattern]
|
||||
| keyws_arg_pattern -> no_pos_arguments
|
||||
|
||||
pos_arg_pattern: as_pattern ("," as_pattern)*
|
||||
keyws_arg_pattern: keyw_arg_pattern ("," keyw_arg_pattern)*
|
||||
keyw_arg_pattern: NAME "=" as_pattern
|
||||
|
||||
|
||||
|
||||
suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT
|
||||
|
||||
?test: or_test ("if" or_test "else" test)?
|
||||
| lambdef
|
||||
| assign_expr
|
||||
|
||||
assign_expr: name ":=" test
|
||||
|
||||
?test_nocond: or_test | lambdef_nocond
|
||||
|
||||
?or_test: and_test ("or" and_test)*
|
||||
?and_test: not_test_ ("and" not_test_)*
|
||||
?not_test_: "not" not_test_ -> not_test
|
||||
| comparison
|
||||
?comparison: expr (comp_op expr)*
|
||||
star_expr: "*" expr
|
||||
|
||||
?expr: or_expr
|
||||
?or_expr: xor_expr ("|" xor_expr)*
|
||||
?xor_expr: and_expr ("^" and_expr)*
|
||||
?and_expr: shift_expr ("&" shift_expr)*
|
||||
?shift_expr: arith_expr (_shift_op arith_expr)*
|
||||
?arith_expr: term (_add_op term)*
|
||||
?term: factor (_mul_op factor)*
|
||||
?factor: _unary_op factor | power
|
||||
|
||||
!_unary_op: "+"|"-"|"~"
|
||||
!_add_op: "+"|"-"
|
||||
!_shift_op: "<<"|">>"
|
||||
!_mul_op: "*"|"@"|"/"|"%"|"//"
|
||||
// <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
// sake of a __future__ import described in PEP 401 (which really works :-)
|
||||
!comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not"
|
||||
|
||||
?power: await_expr ("**" factor)?
|
||||
?await_expr: AWAIT? atom_expr
|
||||
AWAIT: "await"
|
||||
|
||||
?atom_expr: atom_expr "(" [arguments] ")" -> funccall
|
||||
| atom_expr "[" subscriptlist "]" -> getitem
|
||||
| atom_expr "." name -> getattr
|
||||
| atom
|
||||
|
||||
?atom: "(" yield_expr ")"
|
||||
| "(" _tuple_inner? ")" -> tuple
|
||||
| "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension
|
||||
| "[" _exprlist? "]" -> list
|
||||
| "[" comprehension{test_or_star_expr} "]" -> list_comprehension
|
||||
| "{" _dict_exprlist? "}" -> dict
|
||||
| "{" comprehension{key_value} "}" -> dict_comprehension
|
||||
| "{" _exprlist "}" -> set
|
||||
| "{" comprehension{test} "}" -> set_comprehension
|
||||
| name -> var
|
||||
| number
|
||||
| string_concat
|
||||
| "(" test ")"
|
||||
| "..." -> ellipsis
|
||||
| "None" -> const_none
|
||||
| "True" -> const_true
|
||||
| "False" -> const_false
|
||||
|
||||
|
||||
?string_concat: string+
|
||||
|
||||
_tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",")
|
||||
|
||||
?test_or_star_expr: test
|
||||
| star_expr
|
||||
|
||||
?subscriptlist: subscript
|
||||
| subscript (("," subscript)+ [","] | ",") -> subscript_tuple
|
||||
?subscript: test | ([test] ":" [test] [sliceop]) -> slice
|
||||
sliceop: ":" [test]
|
||||
?exprlist: (expr|star_expr)
|
||||
| (expr|star_expr) (("," (expr|star_expr))+ [","]|",")
|
||||
?testlist: test | testlist_tuple
|
||||
testlist_tuple: test (("," test)+ [","] | ",")
|
||||
_dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","]
|
||||
|
||||
key_value: test ":" test
|
||||
|
||||
_exprlist: test_or_star_expr ("," test_or_star_expr)* [","]
|
||||
|
||||
classdef: "class" name ["(" [arguments] ")"] ":" suite
|
||||
|
||||
|
||||
|
||||
arguments: argvalue ("," argvalue)* ("," [ starargs | kwargs])?
|
||||
| starargs
|
||||
| kwargs
|
||||
| comprehension{test}
|
||||
|
||||
starargs: stararg ("," stararg)* ("," argvalue)* ["," kwargs]
|
||||
stararg: "*" test
|
||||
kwargs: "**" test ("," argvalue)*
|
||||
|
||||
?argvalue: test ("=" test)?
|
||||
|
||||
|
||||
comprehension{comp_result}: comp_result comp_fors [comp_if]
|
||||
comp_fors: comp_for+
|
||||
comp_for: [ASYNC] "for" exprlist "in" or_test
|
||||
ASYNC: "async"
|
||||
?comp_if: "if" test_nocond
|
||||
|
||||
// not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: name
|
||||
|
||||
yield_expr: "yield" [testlist]
|
||||
| "yield" "from" test -> yield_from
|
||||
|
||||
number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER
|
||||
string: STRING | LONG_STRING
|
||||
|
||||
// Other terminals
|
||||
|
||||
_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+
|
||||
|
||||
%ignore /[\t \f]+/ // WS
|
||||
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
|
||||
%ignore COMMENT
|
||||
%declare _INDENT _DEDENT
|
||||
|
||||
|
||||
// Python terminals
|
||||
|
||||
!name: NAME | "match" | "case"
|
||||
NAME: /[^\W\d]\w*/
|
||||
COMMENT: /#[^\n]*/
|
||||
|
||||
STRING: /([ubf]?r?|r[ubf])("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
|
||||
LONG_STRING: /([ubf]?r?|r[ubf])(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is
|
||||
|
||||
_SPECIAL_DEC: "0".."9" ("_"? "0".."9" )*
|
||||
DEC_NUMBER: "1".."9" ("_"? "0".."9" )*
|
||||
| "0" ("_"? "0" )* /(?![1-9])/
|
||||
HEX_NUMBER.2: "0" ("x" | "X") ("_"? ("0".."9" | "a".."f" | "A".."F"))+
|
||||
OCT_NUMBER.2: "0" ("o" | "O") ("_"? "0".."7" )+
|
||||
BIN_NUMBER.2: "0" ("b" | "B") ("_"? "0".."1" )+
|
||||
|
||||
_EXP: ("e"|"E") ["+" | "-"] _SPECIAL_DEC
|
||||
DECIMAL: "." _SPECIAL_DEC | _SPECIAL_DEC "." _SPECIAL_DEC?
|
||||
FLOAT_NUMBER.2: _SPECIAL_DEC _EXP | DECIMAL _EXP?
|
||||
IMAG_NUMBER.2: (_SPECIAL_DEC | FLOAT_NUMBER) ("J" | "j")
|
||||
|
||||
|
||||
// Comma-separated list (with an optional trailing comma)
|
||||
cs_list{item}: item ("," item)* ","?
|
||||
_cs_list{item}: item ("," item)* ","?
|
||||
7
ccxt/static_dependencies/lark/grammars/unicode.lark
Normal file
7
ccxt/static_dependencies/lark/grammars/unicode.lark
Normal file
@@ -0,0 +1,7 @@
|
||||
// TODO: LETTER, WORD, etc.
|
||||
|
||||
//
|
||||
// Whitespace
|
||||
//
|
||||
WS_INLINE: /[ \t\xa0]/+
|
||||
WS: /[ \t\xa0\f\r\n]/+
|
||||
143
ccxt/static_dependencies/lark/indenter.py
Normal file
143
ccxt/static_dependencies/lark/indenter.py
Normal file
@@ -0,0 +1,143 @@
|
||||
"Provides a post-lexer for implementing Python-style indentation."
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Iterator
|
||||
|
||||
from .exceptions import LarkError
|
||||
from .lark import PostLex
|
||||
from .lexer import Token
|
||||
|
||||
###{standalone
|
||||
|
||||
class DedentError(LarkError):
|
||||
pass
|
||||
|
||||
class Indenter(PostLex, ABC):
|
||||
"""This is a postlexer that "injects" indent/dedent tokens based on indentation.
|
||||
|
||||
It keeps track of the current indentation, as well as the current level of parentheses.
|
||||
Inside parentheses, the indentation is ignored, and no indent/dedent tokens get generated.
|
||||
|
||||
Note: This is an abstract class. To use it, inherit and implement all its abstract methods:
|
||||
- tab_len
|
||||
- NL_type
|
||||
- OPEN_PAREN_types, CLOSE_PAREN_types
|
||||
- INDENT_type, DEDENT_type
|
||||
|
||||
See also: the ``postlex`` option in `Lark`.
|
||||
"""
|
||||
paren_level: int
|
||||
indent_level: List[int]
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.paren_level = 0
|
||||
self.indent_level = [0]
|
||||
assert self.tab_len > 0
|
||||
|
||||
def handle_NL(self, token: Token) -> Iterator[Token]:
|
||||
if self.paren_level > 0:
|
||||
return
|
||||
|
||||
yield token
|
||||
|
||||
indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces
|
||||
indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
|
||||
|
||||
if indent > self.indent_level[-1]:
|
||||
self.indent_level.append(indent)
|
||||
yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
|
||||
else:
|
||||
while indent < self.indent_level[-1]:
|
||||
self.indent_level.pop()
|
||||
yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
|
||||
|
||||
if indent != self.indent_level[-1]:
|
||||
raise DedentError('Unexpected dedent to column %s. Expected dedent to %s' % (indent, self.indent_level[-1]))
|
||||
|
||||
def _process(self, stream):
|
||||
for token in stream:
|
||||
if token.type == self.NL_type:
|
||||
yield from self.handle_NL(token)
|
||||
else:
|
||||
yield token
|
||||
|
||||
if token.type in self.OPEN_PAREN_types:
|
||||
self.paren_level += 1
|
||||
elif token.type in self.CLOSE_PAREN_types:
|
||||
self.paren_level -= 1
|
||||
assert self.paren_level >= 0
|
||||
|
||||
while len(self.indent_level) > 1:
|
||||
self.indent_level.pop()
|
||||
yield Token(self.DEDENT_type, '')
|
||||
|
||||
assert self.indent_level == [0], self.indent_level
|
||||
|
||||
def process(self, stream):
|
||||
self.paren_level = 0
|
||||
self.indent_level = [0]
|
||||
return self._process(stream)
|
||||
|
||||
# XXX Hack for ContextualLexer. Maybe there's a more elegant solution?
|
||||
@property
|
||||
def always_accept(self):
|
||||
return (self.NL_type,)
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def NL_type(self) -> str:
|
||||
"The name of the newline token"
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def OPEN_PAREN_types(self) -> List[str]:
|
||||
"The names of the tokens that open a parenthesis"
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def CLOSE_PAREN_types(self) -> List[str]:
|
||||
"""The names of the tokens that close a parenthesis
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def INDENT_type(self) -> str:
|
||||
"""The name of the token that starts an indentation in the grammar.
|
||||
|
||||
See also: %declare
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def DEDENT_type(self) -> str:
|
||||
"""The name of the token that end an indentation in the grammar.
|
||||
|
||||
See also: %declare
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def tab_len(self) -> int:
|
||||
"""How many spaces does a tab equal"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class PythonIndenter(Indenter):
|
||||
"""A postlexer that "injects" _INDENT/_DEDENT tokens based on indentation, according to the Python syntax.
|
||||
|
||||
See also: the ``postlex`` option in `Lark`.
|
||||
"""
|
||||
|
||||
NL_type = '_NEWLINE'
|
||||
OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
|
||||
CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
|
||||
INDENT_type = '_INDENT'
|
||||
DEDENT_type = '_DEDENT'
|
||||
tab_len = 8
|
||||
|
||||
###}
|
||||
658
ccxt/static_dependencies/lark/lark.py
Normal file
658
ccxt/static_dependencies/lark/lark.py
Normal file
@@ -0,0 +1,658 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import getpass
|
||||
import sys, os, pickle
|
||||
import tempfile
|
||||
import types
|
||||
import re
|
||||
from typing import (
|
||||
TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, Sequence,
|
||||
Tuple, Iterable, IO, Any, TYPE_CHECKING, Collection
|
||||
)
|
||||
if TYPE_CHECKING:
|
||||
from .parsers.lalr_interactive_parser import InteractiveParser
|
||||
from .tree import ParseTree
|
||||
from .visitors import Transformer
|
||||
from typing import Literal
|
||||
from .parser_frontends import ParsingFrontend
|
||||
|
||||
from .exceptions import ConfigurationError, assert_config, UnexpectedInput
|
||||
from .utils import Serialize, SerializeMemoizer, FS, logger
|
||||
from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource, sha256_digest
|
||||
from .tree import Tree
|
||||
from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType
|
||||
|
||||
from .lexer import Lexer, BasicLexer, TerminalDef, LexerThread, Token
|
||||
from .parse_tree_builder import ParseTreeBuilder
|
||||
from .parser_frontends import _validate_frontend_args, _get_lexer_callbacks, _deserialize_parsing_frontend, _construct_parsing_frontend
|
||||
from .grammar import Rule
|
||||
|
||||
|
||||
try:
|
||||
import regex
|
||||
_has_regex = True
|
||||
except ImportError:
|
||||
_has_regex = False
|
||||
|
||||
|
||||
###{standalone
|
||||
|
||||
|
||||
class PostLex(ABC):
|
||||
@abstractmethod
|
||||
def process(self, stream: Iterator[Token]) -> Iterator[Token]:
|
||||
return stream
|
||||
|
||||
always_accept: Iterable[str] = ()
|
||||
|
||||
class LarkOptions(Serialize):
|
||||
"""Specifies the options for Lark
|
||||
|
||||
"""
|
||||
|
||||
start: List[str]
|
||||
debug: bool
|
||||
strict: bool
|
||||
transformer: 'Optional[Transformer]'
|
||||
propagate_positions: Union[bool, str]
|
||||
maybe_placeholders: bool
|
||||
cache: Union[bool, str]
|
||||
regex: bool
|
||||
g_regex_flags: int
|
||||
keep_all_tokens: bool
|
||||
tree_class: Optional[Callable[[str, List], Any]]
|
||||
parser: _ParserArgType
|
||||
lexer: _LexerArgType
|
||||
ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]'
|
||||
postlex: Optional[PostLex]
|
||||
priority: 'Optional[Literal["auto", "normal", "invert"]]'
|
||||
lexer_callbacks: Dict[str, Callable[[Token], Token]]
|
||||
use_bytes: bool
|
||||
ordered_sets: bool
|
||||
edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]]
|
||||
import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]'
|
||||
source_path: Optional[str]
|
||||
|
||||
OPTIONS_DOC = r"""
|
||||
**=== General Options ===**
|
||||
|
||||
start
|
||||
The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start")
|
||||
debug
|
||||
Display debug information and extra warnings. Use only when debugging (Default: ``False``)
|
||||
When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed.
|
||||
strict
|
||||
Throw an exception on any potential ambiguity, including shift/reduce conflicts, and regex collisions.
|
||||
transformer
|
||||
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
|
||||
propagate_positions
|
||||
Propagates positional attributes into the 'meta' attribute of all tree branches.
|
||||
Sets attributes: (line, column, end_line, end_column, start_pos, end_pos,
|
||||
container_line, container_column, container_end_line, container_end_column)
|
||||
Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating.
|
||||
maybe_placeholders
|
||||
When ``True``, the ``[]`` operator returns ``None`` when not matched.
|
||||
When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all.
|
||||
(default= ``True``)
|
||||
cache
|
||||
Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now.
|
||||
|
||||
- When ``False``, does nothing (default)
|
||||
- When ``True``, caches to a temporary file in the local directory
|
||||
- When given a string, caches to the path pointed by the string
|
||||
regex
|
||||
When True, uses the ``regex`` module instead of the stdlib ``re``.
|
||||
g_regex_flags
|
||||
Flags that are applied to all terminals (both regex and strings)
|
||||
keep_all_tokens
|
||||
Prevent the tree builder from automagically removing "punctuation" tokens (Default: ``False``)
|
||||
tree_class
|
||||
Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``.
|
||||
|
||||
**=== Algorithm Options ===**
|
||||
|
||||
parser
|
||||
Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley").
|
||||
(there is also a "cyk" option for legacy)
|
||||
lexer
|
||||
Decides whether or not to use a lexer stage
|
||||
|
||||
- "auto" (default): Choose for me based on the parser
|
||||
- "basic": Use a basic lexer
|
||||
- "contextual": Stronger lexer (only works with parser="lalr")
|
||||
- "dynamic": Flexible and powerful (only with parser="earley")
|
||||
- "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible.
|
||||
ambiguity
|
||||
Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
|
||||
|
||||
- "resolve": The parser will automatically choose the simplest derivation
|
||||
(it chooses consistently: greedy for tokens, non-greedy for rules)
|
||||
- "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
|
||||
- "forest": The parser will return the root of the shared packed parse forest.
|
||||
|
||||
**=== Misc. / Domain Specific Options ===**
|
||||
|
||||
postlex
|
||||
Lexer post-processing (Default: ``None``) Only works with the basic and contextual lexers.
|
||||
priority
|
||||
How priorities should be evaluated - "auto", ``None``, "normal", "invert" (Default: "auto")
|
||||
lexer_callbacks
|
||||
Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
|
||||
use_bytes
|
||||
Accept an input of type ``bytes`` instead of ``str``.
|
||||
ordered_sets
|
||||
Should Earley use ordered-sets to achieve stable output (~10% slower than regular sets. Default: True)
|
||||
edit_terminals
|
||||
A callback for editing the terminals before parse.
|
||||
import_paths
|
||||
A List of either paths or loader functions to specify from where grammars are imported
|
||||
source_path
|
||||
Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading
|
||||
**=== End of Options ===**
|
||||
"""
|
||||
if __doc__:
|
||||
__doc__ += OPTIONS_DOC
|
||||
|
||||
|
||||
# Adding a new option needs to be done in multiple places:
|
||||
# - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts
|
||||
# - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs
|
||||
# - As an attribute of `LarkOptions` above
|
||||
# - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded
|
||||
# - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument
|
||||
_defaults: Dict[str, Any] = {
|
||||
'debug': False,
|
||||
'strict': False,
|
||||
'keep_all_tokens': False,
|
||||
'tree_class': None,
|
||||
'cache': False,
|
||||
'postlex': None,
|
||||
'parser': 'earley',
|
||||
'lexer': 'auto',
|
||||
'transformer': None,
|
||||
'start': 'start',
|
||||
'priority': 'auto',
|
||||
'ambiguity': 'auto',
|
||||
'regex': False,
|
||||
'propagate_positions': False,
|
||||
'lexer_callbacks': {},
|
||||
'maybe_placeholders': True,
|
||||
'edit_terminals': None,
|
||||
'g_regex_flags': 0,
|
||||
'use_bytes': False,
|
||||
'ordered_sets': True,
|
||||
'import_paths': [],
|
||||
'source_path': None,
|
||||
'_plugins': {},
|
||||
}
|
||||
|
||||
def __init__(self, options_dict: Dict[str, Any]) -> None:
|
||||
o = dict(options_dict)
|
||||
|
||||
options = {}
|
||||
for name, default in self._defaults.items():
|
||||
if name in o:
|
||||
value = o.pop(name)
|
||||
if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'):
|
||||
value = bool(value)
|
||||
else:
|
||||
value = default
|
||||
|
||||
options[name] = value
|
||||
|
||||
if isinstance(options['start'], str):
|
||||
options['start'] = [options['start']]
|
||||
|
||||
self.__dict__['options'] = options
|
||||
|
||||
|
||||
assert_config(self.parser, ('earley', 'lalr', 'cyk', None))
|
||||
|
||||
if self.parser == 'earley' and self.transformer:
|
||||
raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. '
|
||||
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')
|
||||
|
||||
if o:
|
||||
raise ConfigurationError("Unknown options: %s" % o.keys())
|
||||
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
try:
|
||||
return self.__dict__['options'][name]
|
||||
except KeyError as e:
|
||||
raise AttributeError(e)
|
||||
|
||||
def __setattr__(self, name: str, value: str) -> None:
|
||||
assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s")
|
||||
self.options[name] = value
|
||||
|
||||
def serialize(self, memo = None) -> Dict[str, Any]:
|
||||
return self.options
|
||||
|
||||
@classmethod
|
||||
def deserialize(cls, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]]) -> "LarkOptions":
|
||||
return cls(data)
|
||||
|
||||
|
||||
# Options that can be passed to the Lark parser, even when it was loaded from cache/standalone.
|
||||
# These options are only used outside of `load_grammar`.
|
||||
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class', '_plugins'}
|
||||
|
||||
_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
|
||||
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')
|
||||
|
||||
|
||||
_T = TypeVar('_T', bound="Lark")
|
||||
|
||||
class Lark(Serialize):
|
||||
"""Main interface for the library.
|
||||
|
||||
It's mostly a thin wrapper for the many different parsers, and for the tree constructor.
|
||||
|
||||
Parameters:
|
||||
grammar: a string or file-object containing the grammar spec (using Lark's ebnf syntax)
|
||||
options: a dictionary controlling various aspects of Lark.
|
||||
|
||||
Example:
|
||||
>>> Lark(r'''start: "foo" ''')
|
||||
Lark(...)
|
||||
"""
|
||||
|
||||
source_path: str
|
||||
source_grammar: str
|
||||
grammar: 'Grammar'
|
||||
options: LarkOptions
|
||||
lexer: Lexer
|
||||
parser: 'ParsingFrontend'
|
||||
terminals: Collection[TerminalDef]
|
||||
|
||||
def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
|
||||
self.options = LarkOptions(options)
|
||||
re_module: types.ModuleType
|
||||
|
||||
# Set regex or re module
|
||||
use_regex = self.options.regex
|
||||
if use_regex:
|
||||
if _has_regex:
|
||||
re_module = regex
|
||||
else:
|
||||
raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.')
|
||||
else:
|
||||
re_module = re
|
||||
|
||||
# Some, but not all file-like objects have a 'name' attribute
|
||||
if self.options.source_path is None:
|
||||
try:
|
||||
self.source_path = grammar.name # type: ignore[union-attr]
|
||||
except AttributeError:
|
||||
self.source_path = '<string>'
|
||||
else:
|
||||
self.source_path = self.options.source_path
|
||||
|
||||
# Drain file-like objects to get their contents
|
||||
try:
|
||||
read = grammar.read # type: ignore[union-attr]
|
||||
except AttributeError:
|
||||
pass
|
||||
else:
|
||||
grammar = read()
|
||||
|
||||
cache_fn = None
|
||||
cache_sha256 = None
|
||||
if isinstance(grammar, str):
|
||||
self.source_grammar = grammar
|
||||
if self.options.use_bytes:
|
||||
if not grammar.isascii():
|
||||
raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
|
||||
|
||||
if self.options.cache:
|
||||
if self.options.parser != 'lalr':
|
||||
raise ConfigurationError("cache only works with parser='lalr' for now")
|
||||
|
||||
unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals', '_plugins')
|
||||
options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
|
||||
from . import __version__
|
||||
s = grammar + options_str + __version__ + str(sys.version_info[:2])
|
||||
cache_sha256 = sha256_digest(s)
|
||||
|
||||
if isinstance(self.options.cache, str):
|
||||
cache_fn = self.options.cache
|
||||
else:
|
||||
if self.options.cache is not True:
|
||||
raise ConfigurationError("cache argument must be bool or str")
|
||||
|
||||
try:
|
||||
username = getpass.getuser()
|
||||
except Exception:
|
||||
# The exception raised may be ImportError or OSError in
|
||||
# the future. For the cache, we don't care about the
|
||||
# specific reason - we just want a username.
|
||||
username = "unknown"
|
||||
|
||||
cache_fn = tempfile.gettempdir() + "/.lark_cache_%s_%s_%s_%s.tmp" % (username, cache_sha256, *sys.version_info[:2])
|
||||
|
||||
old_options = self.options
|
||||
try:
|
||||
with FS.open(cache_fn, 'rb') as f:
|
||||
logger.debug('Loading grammar from cache: %s', cache_fn)
|
||||
# Remove options that aren't relevant for loading from cache
|
||||
for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
|
||||
del options[name]
|
||||
file_sha256 = f.readline().rstrip(b'\n')
|
||||
cached_used_files = pickle.load(f)
|
||||
if file_sha256 == cache_sha256.encode('utf8') and verify_used_files(cached_used_files):
|
||||
cached_parser_data = pickle.load(f)
|
||||
self._load(cached_parser_data, **options)
|
||||
return
|
||||
except FileNotFoundError:
|
||||
# The cache file doesn't exist; parse and compose the grammar as normal
|
||||
pass
|
||||
except Exception: # We should probably narrow done which errors we catch here.
|
||||
logger.exception("Failed to load Lark from cache: %r. We will try to carry on.", cache_fn)
|
||||
|
||||
# In theory, the Lark instance might have been messed up by the call to `_load`.
|
||||
# In practice the only relevant thing that might have been overwritten should be `options`
|
||||
self.options = old_options
|
||||
|
||||
|
||||
# Parse the grammar file and compose the grammars
|
||||
self.grammar, used_files = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens)
|
||||
else:
|
||||
assert isinstance(grammar, Grammar)
|
||||
self.grammar = grammar
|
||||
|
||||
|
||||
if self.options.lexer == 'auto':
|
||||
if self.options.parser == 'lalr':
|
||||
self.options.lexer = 'contextual'
|
||||
elif self.options.parser == 'earley':
|
||||
if self.options.postlex is not None:
|
||||
logger.info("postlex can't be used with the dynamic lexer, so we use 'basic' instead. "
|
||||
"Consider using lalr with contextual instead of earley")
|
||||
self.options.lexer = 'basic'
|
||||
else:
|
||||
self.options.lexer = 'dynamic'
|
||||
elif self.options.parser == 'cyk':
|
||||
self.options.lexer = 'basic'
|
||||
else:
|
||||
assert False, self.options.parser
|
||||
lexer = self.options.lexer
|
||||
if isinstance(lexer, type):
|
||||
assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance
|
||||
else:
|
||||
assert_config(lexer, ('basic', 'contextual', 'dynamic', 'dynamic_complete'))
|
||||
if self.options.postlex is not None and 'dynamic' in lexer:
|
||||
raise ConfigurationError("Can't use postlex with a dynamic lexer. Use basic or contextual instead")
|
||||
|
||||
if self.options.ambiguity == 'auto':
|
||||
if self.options.parser == 'earley':
|
||||
self.options.ambiguity = 'resolve'
|
||||
else:
|
||||
assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s")
|
||||
|
||||
if self.options.priority == 'auto':
|
||||
self.options.priority = 'normal'
|
||||
|
||||
if self.options.priority not in _VALID_PRIORITY_OPTIONS:
|
||||
raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
|
||||
if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
|
||||
raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))
|
||||
|
||||
if self.options.parser is None:
|
||||
terminals_to_keep = '*'
|
||||
elif self.options.postlex is not None:
|
||||
terminals_to_keep = set(self.options.postlex.always_accept)
|
||||
else:
|
||||
terminals_to_keep = set()
|
||||
|
||||
# Compile the EBNF grammar into BNF
|
||||
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep)
|
||||
|
||||
if self.options.edit_terminals:
|
||||
for t in self.terminals:
|
||||
self.options.edit_terminals(t)
|
||||
|
||||
self._terminals_dict = {t.name: t for t in self.terminals}
|
||||
|
||||
# If the user asked to invert the priorities, negate them all here.
|
||||
if self.options.priority == 'invert':
|
||||
for rule in self.rules:
|
||||
if rule.options.priority is not None:
|
||||
rule.options.priority = -rule.options.priority
|
||||
for term in self.terminals:
|
||||
term.priority = -term.priority
|
||||
# Else, if the user asked to disable priorities, strip them from the
|
||||
# rules and terminals. This allows the Earley parsers to skip an extra forest walk
|
||||
# for improved performance, if you don't need them (or didn't specify any).
|
||||
elif self.options.priority is None:
|
||||
for rule in self.rules:
|
||||
if rule.options.priority is not None:
|
||||
rule.options.priority = None
|
||||
for term in self.terminals:
|
||||
term.priority = 0
|
||||
|
||||
# TODO Deprecate lexer_callbacks?
|
||||
self.lexer_conf = LexerConf(
|
||||
self.terminals, re_module, self.ignore_tokens, self.options.postlex,
|
||||
self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes, strict=self.options.strict
|
||||
)
|
||||
|
||||
if self.options.parser:
|
||||
self.parser = self._build_parser()
|
||||
elif lexer:
|
||||
self.lexer = self._build_lexer()
|
||||
|
||||
if cache_fn:
|
||||
logger.debug('Saving grammar to cache: %s', cache_fn)
|
||||
try:
|
||||
with FS.open(cache_fn, 'wb') as f:
|
||||
assert cache_sha256 is not None
|
||||
f.write(cache_sha256.encode('utf8') + b'\n')
|
||||
pickle.dump(used_files, f)
|
||||
self.save(f, _LOAD_ALLOWED_OPTIONS)
|
||||
except IOError as e:
|
||||
logger.exception("Failed to save Lark to cache: %r.", cache_fn, e)
|
||||
|
||||
if __doc__:
|
||||
__doc__ += "\n\n" + LarkOptions.OPTIONS_DOC
|
||||
|
||||
__serialize_fields__ = 'parser', 'rules', 'options'
|
||||
|
||||
def _build_lexer(self, dont_ignore: bool=False) -> BasicLexer:
|
||||
lexer_conf = self.lexer_conf
|
||||
if dont_ignore:
|
||||
from copy import copy
|
||||
lexer_conf = copy(lexer_conf)
|
||||
lexer_conf.ignore = ()
|
||||
return BasicLexer(lexer_conf)
|
||||
|
||||
def _prepare_callbacks(self) -> None:
|
||||
self._callbacks = {}
|
||||
# we don't need these callbacks if we aren't building a tree
|
||||
if self.options.ambiguity != 'forest':
|
||||
self._parse_tree_builder = ParseTreeBuilder(
|
||||
self.rules,
|
||||
self.options.tree_class or Tree,
|
||||
self.options.propagate_positions,
|
||||
self.options.parser != 'lalr' and self.options.ambiguity == 'explicit',
|
||||
self.options.maybe_placeholders
|
||||
)
|
||||
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
|
||||
self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals))
|
||||
|
||||
def _build_parser(self) -> "ParsingFrontend":
|
||||
self._prepare_callbacks()
|
||||
_validate_frontend_args(self.options.parser, self.options.lexer)
|
||||
parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
|
||||
return _construct_parsing_frontend(
|
||||
self.options.parser,
|
||||
self.options.lexer,
|
||||
self.lexer_conf,
|
||||
parser_conf,
|
||||
options=self.options
|
||||
)
|
||||
|
||||
def save(self, f, exclude_options: Collection[str] = ()) -> None:
|
||||
"""Saves the instance into the given file object
|
||||
|
||||
Useful for caching and multiprocessing.
|
||||
"""
|
||||
if self.options.parser != 'lalr':
|
||||
raise NotImplementedError("Lark.save() is only implemented for the LALR(1) parser.")
|
||||
data, m = self.memo_serialize([TerminalDef, Rule])
|
||||
if exclude_options:
|
||||
data["options"] = {n: v for n, v in data["options"].items() if n not in exclude_options}
|
||||
pickle.dump({'data': data, 'memo': m}, f, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
@classmethod
|
||||
def load(cls: Type[_T], f) -> _T:
|
||||
"""Loads an instance from the given file object
|
||||
|
||||
Useful for caching and multiprocessing.
|
||||
"""
|
||||
inst = cls.__new__(cls)
|
||||
return inst._load(f)
|
||||
|
||||
def _deserialize_lexer_conf(self, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]], options: LarkOptions) -> LexerConf:
|
||||
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
|
||||
lexer_conf.callbacks = options.lexer_callbacks or {}
|
||||
lexer_conf.re_module = regex if options.regex else re
|
||||
lexer_conf.use_bytes = options.use_bytes
|
||||
lexer_conf.g_regex_flags = options.g_regex_flags
|
||||
lexer_conf.skip_validation = True
|
||||
lexer_conf.postlex = options.postlex
|
||||
return lexer_conf
|
||||
|
||||
def _load(self: _T, f: Any, **kwargs) -> _T:
|
||||
if isinstance(f, dict):
|
||||
d = f
|
||||
else:
|
||||
d = pickle.load(f)
|
||||
memo_json = d['memo']
|
||||
data = d['data']
|
||||
|
||||
assert memo_json
|
||||
memo = SerializeMemoizer.deserialize(memo_json, {'Rule': Rule, 'TerminalDef': TerminalDef}, {})
|
||||
options = dict(data['options'])
|
||||
if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults):
|
||||
raise ConfigurationError("Some options are not allowed when loading a Parser: {}"
|
||||
.format(set(kwargs) - _LOAD_ALLOWED_OPTIONS))
|
||||
options.update(kwargs)
|
||||
self.options = LarkOptions.deserialize(options, memo)
|
||||
self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
|
||||
self.source_path = '<deserialized>'
|
||||
_validate_frontend_args(self.options.parser, self.options.lexer)
|
||||
self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options)
|
||||
self.terminals = self.lexer_conf.terminals
|
||||
self._prepare_callbacks()
|
||||
self._terminals_dict = {t.name: t for t in self.terminals}
|
||||
self.parser = _deserialize_parsing_frontend(
|
||||
data['parser'],
|
||||
memo,
|
||||
self.lexer_conf,
|
||||
self._callbacks,
|
||||
self.options, # Not all, but multiple attributes are used
|
||||
)
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def _load_from_dict(cls, data, memo, **kwargs):
|
||||
inst = cls.__new__(cls)
|
||||
return inst._load({'data': data, 'memo': memo}, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T:
|
||||
"""Create an instance of Lark with the grammar given by its filename
|
||||
|
||||
If ``rel_to`` is provided, the function will find the grammar filename in relation to it.
|
||||
|
||||
Example:
|
||||
|
||||
>>> Lark.open("grammar_file.lark", rel_to=__file__, parser="lalr")
|
||||
Lark(...)
|
||||
|
||||
"""
|
||||
if rel_to:
|
||||
basepath = os.path.dirname(rel_to)
|
||||
grammar_filename = os.path.join(basepath, grammar_filename)
|
||||
with open(grammar_filename, encoding='utf8') as f:
|
||||
return cls(f, **options)
|
||||
|
||||
@classmethod
|
||||
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: 'Sequence[str]'=[""], **options) -> _T:
|
||||
"""Create an instance of Lark with the grammar loaded from within the package `package`.
|
||||
This allows grammar loading from zipapps.
|
||||
|
||||
Imports in the grammar will use the `package` and `search_paths` provided, through `FromPackageLoader`
|
||||
|
||||
Example:
|
||||
|
||||
Lark.open_from_package(__name__, "example.lark", ("grammars",), parser=...)
|
||||
"""
|
||||
package_loader = FromPackageLoader(package, search_paths)
|
||||
full_path, text = package_loader(None, grammar_path)
|
||||
options.setdefault('source_path', full_path)
|
||||
options.setdefault('import_paths', [])
|
||||
options['import_paths'].append(package_loader)
|
||||
return cls(text, **options)
|
||||
|
||||
def __repr__(self):
|
||||
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)
|
||||
|
||||
|
||||
def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]:
|
||||
"""Only lex (and postlex) the text, without parsing it. Only relevant when lexer='basic'
|
||||
|
||||
When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore.
|
||||
|
||||
:raises UnexpectedCharacters: In case the lexer cannot find a suitable match.
|
||||
"""
|
||||
lexer: Lexer
|
||||
if not hasattr(self, 'lexer') or dont_ignore:
|
||||
lexer = self._build_lexer(dont_ignore)
|
||||
else:
|
||||
lexer = self.lexer
|
||||
lexer_thread = LexerThread.from_text(lexer, text)
|
||||
stream = lexer_thread.lex(None)
|
||||
if self.options.postlex:
|
||||
return self.options.postlex.process(stream)
|
||||
return stream
|
||||
|
||||
def get_terminal(self, name: str) -> TerminalDef:
|
||||
"""Get information about a terminal"""
|
||||
return self._terminals_dict[name]
|
||||
|
||||
def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser':
|
||||
"""Start an interactive parsing session.
|
||||
|
||||
Parameters:
|
||||
text (str, optional): Text to be parsed. Required for ``resume_parse()``.
|
||||
start (str, optional): Start symbol
|
||||
|
||||
Returns:
|
||||
A new InteractiveParser instance.
|
||||
|
||||
See Also: ``Lark.parse()``
|
||||
"""
|
||||
return self.parser.parse_interactive(text, start=start)
|
||||
|
||||
def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> 'ParseTree':
|
||||
"""Parse the given text, according to the options provided.
|
||||
|
||||
Parameters:
|
||||
text (str): Text to be parsed.
|
||||
start (str, optional): Required if Lark was given multiple possible start symbols (using the start option).
|
||||
on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing.
|
||||
LALR only. See examples/advanced/error_handling.py for an example of how to use on_error.
|
||||
|
||||
Returns:
|
||||
If a transformer is supplied to ``__init__``, returns whatever is the
|
||||
result of the transformation. Otherwise, returns a Tree instance.
|
||||
|
||||
:raises UnexpectedInput: On a parse error, one of these sub-exceptions will rise:
|
||||
``UnexpectedCharacters``, ``UnexpectedToken``, or ``UnexpectedEOF``.
|
||||
For convenience, these sub-exceptions also inherit from ``ParserError`` and ``LexerError``.
|
||||
|
||||
"""
|
||||
return self.parser.parse(text, start=start, on_error=on_error)
|
||||
|
||||
|
||||
###}
|
||||
678
ccxt/static_dependencies/lark/lexer.py
Normal file
678
ccxt/static_dependencies/lark/lexer.py
Normal file
@@ -0,0 +1,678 @@
|
||||
# Lexer Implementation
|
||||
|
||||
from abc import abstractmethod, ABC
|
||||
import re
|
||||
from contextlib import suppress
|
||||
from typing import (
|
||||
TypeVar, Type, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
|
||||
ClassVar, TYPE_CHECKING, overload
|
||||
)
|
||||
from types import ModuleType
|
||||
import warnings
|
||||
try:
|
||||
import interegular
|
||||
except ImportError:
|
||||
pass
|
||||
if TYPE_CHECKING:
|
||||
from .common import LexerConf
|
||||
from .parsers.lalr_parser_state import ParserState
|
||||
|
||||
from .utils import classify, get_regexp_width, Serialize, logger
|
||||
from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken
|
||||
from .grammar import TOKEN_DEFAULT_PRIORITY
|
||||
|
||||
|
||||
###{standalone
|
||||
from copy import copy
|
||||
|
||||
try: # For the standalone parser, we need to make sure that has_interegular is False to avoid NameErrors later on
|
||||
has_interegular = bool(interegular)
|
||||
except NameError:
|
||||
has_interegular = False
|
||||
|
||||
class Pattern(Serialize, ABC):
|
||||
"An abstraction over regular expressions."
|
||||
|
||||
value: str
|
||||
flags: Collection[str]
|
||||
raw: Optional[str]
|
||||
type: ClassVar[str]
|
||||
|
||||
def __init__(self, value: str, flags: Collection[str] = (), raw: Optional[str] = None) -> None:
|
||||
self.value = value
|
||||
self.flags = frozenset(flags)
|
||||
self.raw = raw
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self.to_regexp())
|
||||
|
||||
# Pattern Hashing assumes all subclasses have a different priority!
|
||||
def __hash__(self):
|
||||
return hash((type(self), self.value, self.flags))
|
||||
|
||||
def __eq__(self, other):
|
||||
return type(self) == type(other) and self.value == other.value and self.flags == other.flags
|
||||
|
||||
@abstractmethod
|
||||
def to_regexp(self) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def min_width(self) -> int:
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def max_width(self) -> int:
|
||||
raise NotImplementedError()
|
||||
|
||||
def _get_flags(self, value):
|
||||
for f in self.flags:
|
||||
value = ('(?%s:%s)' % (f, value))
|
||||
return value
|
||||
|
||||
|
||||
class PatternStr(Pattern):
|
||||
__serialize_fields__ = 'value', 'flags', 'raw'
|
||||
|
||||
type: ClassVar[str] = "str"
|
||||
|
||||
def to_regexp(self) -> str:
|
||||
return self._get_flags(re.escape(self.value))
|
||||
|
||||
@property
|
||||
def min_width(self) -> int:
|
||||
return len(self.value)
|
||||
|
||||
@property
|
||||
def max_width(self) -> int:
|
||||
return len(self.value)
|
||||
|
||||
|
||||
class PatternRE(Pattern):
|
||||
__serialize_fields__ = 'value', 'flags', 'raw', '_width'
|
||||
|
||||
type: ClassVar[str] = "re"
|
||||
|
||||
def to_regexp(self) -> str:
|
||||
return self._get_flags(self.value)
|
||||
|
||||
_width = None
|
||||
def _get_width(self):
|
||||
if self._width is None:
|
||||
self._width = get_regexp_width(self.to_regexp())
|
||||
return self._width
|
||||
|
||||
@property
|
||||
def min_width(self) -> int:
|
||||
return self._get_width()[0]
|
||||
|
||||
@property
|
||||
def max_width(self) -> int:
|
||||
return self._get_width()[1]
|
||||
|
||||
|
||||
class TerminalDef(Serialize):
|
||||
"A definition of a terminal"
|
||||
__serialize_fields__ = 'name', 'pattern', 'priority'
|
||||
__serialize_namespace__ = PatternStr, PatternRE
|
||||
|
||||
name: str
|
||||
pattern: Pattern
|
||||
priority: int
|
||||
|
||||
def __init__(self, name: str, pattern: Pattern, priority: int = TOKEN_DEFAULT_PRIORITY) -> None:
|
||||
assert isinstance(pattern, Pattern), pattern
|
||||
self.name = name
|
||||
self.pattern = pattern
|
||||
self.priority = priority
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
|
||||
|
||||
def user_repr(self) -> str:
|
||||
if self.name.startswith('__'): # We represent a generated terminal
|
||||
return self.pattern.raw or self.name
|
||||
else:
|
||||
return self.name
|
||||
|
||||
_T = TypeVar('_T', bound="Token")
|
||||
|
||||
class Token(str):
|
||||
"""A string with meta-information, that is produced by the lexer.
|
||||
|
||||
When parsing text, the resulting chunks of the input that haven't been discarded,
|
||||
will end up in the tree as Token instances. The Token class inherits from Python's ``str``,
|
||||
so normal string comparisons and operations will work as expected.
|
||||
|
||||
Attributes:
|
||||
type: Name of the token (as specified in grammar)
|
||||
value: Value of the token (redundant, as ``token.value == token`` will always be true)
|
||||
start_pos: The index of the token in the text
|
||||
line: The line of the token in the text (starting with 1)
|
||||
column: The column of the token in the text (starting with 1)
|
||||
end_line: The line where the token ends
|
||||
end_column: The next column after the end of the token. For example,
|
||||
if the token is a single character with a column value of 4,
|
||||
end_column will be 5.
|
||||
end_pos: the index where the token ends (basically ``start_pos + len(token)``)
|
||||
"""
|
||||
__slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
|
||||
|
||||
__match_args__ = ('type', 'value')
|
||||
|
||||
type: str
|
||||
start_pos: Optional[int]
|
||||
value: Any
|
||||
line: Optional[int]
|
||||
column: Optional[int]
|
||||
end_line: Optional[int]
|
||||
end_column: Optional[int]
|
||||
end_pos: Optional[int]
|
||||
|
||||
|
||||
@overload
|
||||
def __new__(
|
||||
cls,
|
||||
type: str,
|
||||
value: Any,
|
||||
start_pos: Optional[int] = None,
|
||||
line: Optional[int] = None,
|
||||
column: Optional[int] = None,
|
||||
end_line: Optional[int] = None,
|
||||
end_column: Optional[int] = None,
|
||||
end_pos: Optional[int] = None
|
||||
) -> 'Token':
|
||||
...
|
||||
|
||||
@overload
|
||||
def __new__(
|
||||
cls,
|
||||
type_: str,
|
||||
value: Any,
|
||||
start_pos: Optional[int] = None,
|
||||
line: Optional[int] = None,
|
||||
column: Optional[int] = None,
|
||||
end_line: Optional[int] = None,
|
||||
end_column: Optional[int] = None,
|
||||
end_pos: Optional[int] = None
|
||||
) -> 'Token': ...
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if "type_" in kwargs:
|
||||
warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning)
|
||||
|
||||
if "type" in kwargs:
|
||||
raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.")
|
||||
kwargs["type"] = kwargs.pop("type_")
|
||||
|
||||
return cls._future_new(*args, **kwargs)
|
||||
|
||||
|
||||
@classmethod
|
||||
def _future_new(cls, type, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
|
||||
inst = super(Token, cls).__new__(cls, value)
|
||||
|
||||
inst.type = type
|
||||
inst.start_pos = start_pos
|
||||
inst.value = value
|
||||
inst.line = line
|
||||
inst.column = column
|
||||
inst.end_line = end_line
|
||||
inst.end_column = end_column
|
||||
inst.end_pos = end_pos
|
||||
return inst
|
||||
|
||||
@overload
|
||||
def update(self, type: Optional[str] = None, value: Optional[Any] = None) -> 'Token':
|
||||
...
|
||||
|
||||
@overload
|
||||
def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> 'Token':
|
||||
...
|
||||
|
||||
def update(self, *args, **kwargs):
|
||||
if "type_" in kwargs:
|
||||
warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning)
|
||||
|
||||
if "type" in kwargs:
|
||||
raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.")
|
||||
kwargs["type"] = kwargs.pop("type_")
|
||||
|
||||
return self._future_update(*args, **kwargs)
|
||||
|
||||
def _future_update(self, type: Optional[str] = None, value: Optional[Any] = None) -> 'Token':
|
||||
return Token.new_borrow_pos(
|
||||
type if type is not None else self.type,
|
||||
value if value is not None else self.value,
|
||||
self
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T:
|
||||
return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
|
||||
|
||||
def __reduce__(self):
|
||||
return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column))
|
||||
|
||||
def __repr__(self):
|
||||
return 'Token(%r, %r)' % (self.type, self.value)
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
return Token(self.type, self.value, self.start_pos, self.line, self.column)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Token) and self.type != other.type:
|
||||
return False
|
||||
|
||||
return str.__eq__(self, other)
|
||||
|
||||
__hash__ = str.__hash__
|
||||
|
||||
|
||||
class LineCounter:
|
||||
"A utility class for keeping track of line & column information"
|
||||
|
||||
__slots__ = 'char_pos', 'line', 'column', 'line_start_pos', 'newline_char'
|
||||
|
||||
def __init__(self, newline_char):
|
||||
self.newline_char = newline_char
|
||||
self.char_pos = 0
|
||||
self.line = 1
|
||||
self.column = 1
|
||||
self.line_start_pos = 0
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, LineCounter):
|
||||
return NotImplemented
|
||||
|
||||
return self.char_pos == other.char_pos and self.newline_char == other.newline_char
|
||||
|
||||
def feed(self, token: Token, test_newline=True):
|
||||
"""Consume a token and calculate the new line & column.
|
||||
|
||||
As an optional optimization, set test_newline=False if token doesn't contain a newline.
|
||||
"""
|
||||
if test_newline:
|
||||
newlines = token.count(self.newline_char)
|
||||
if newlines:
|
||||
self.line += newlines
|
||||
self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1
|
||||
|
||||
self.char_pos += len(token)
|
||||
self.column = self.char_pos - self.line_start_pos + 1
|
||||
|
||||
|
||||
class UnlessCallback:
|
||||
def __init__(self, scanner):
|
||||
self.scanner = scanner
|
||||
|
||||
def __call__(self, t):
|
||||
res = self.scanner.match(t.value, 0)
|
||||
if res:
|
||||
_value, t.type = res
|
||||
return t
|
||||
|
||||
|
||||
class CallChain:
|
||||
def __init__(self, callback1, callback2, cond):
|
||||
self.callback1 = callback1
|
||||
self.callback2 = callback2
|
||||
self.cond = cond
|
||||
|
||||
def __call__(self, t):
|
||||
t2 = self.callback1(t)
|
||||
return self.callback2(t) if self.cond(t2) else t2
|
||||
|
||||
|
||||
def _get_match(re_, regexp, s, flags):
|
||||
m = re_.match(regexp, s, flags)
|
||||
if m:
|
||||
return m.group(0)
|
||||
|
||||
def _create_unless(terminals, g_regex_flags, re_, use_bytes):
|
||||
tokens_by_type = classify(terminals, lambda t: type(t.pattern))
|
||||
assert len(tokens_by_type) <= 2, tokens_by_type.keys()
|
||||
embedded_strs = set()
|
||||
callback = {}
|
||||
for retok in tokens_by_type.get(PatternRE, []):
|
||||
unless = []
|
||||
for strtok in tokens_by_type.get(PatternStr, []):
|
||||
if strtok.priority != retok.priority:
|
||||
continue
|
||||
s = strtok.pattern.value
|
||||
if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags):
|
||||
unless.append(strtok)
|
||||
if strtok.pattern.flags <= retok.pattern.flags:
|
||||
embedded_strs.add(strtok)
|
||||
if unless:
|
||||
callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes))
|
||||
|
||||
new_terminals = [t for t in terminals if t not in embedded_strs]
|
||||
return new_terminals, callback
|
||||
|
||||
|
||||
class Scanner:
|
||||
def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False):
|
||||
self.terminals = terminals
|
||||
self.g_regex_flags = g_regex_flags
|
||||
self.re_ = re_
|
||||
self.use_bytes = use_bytes
|
||||
self.match_whole = match_whole
|
||||
|
||||
self.allowed_types = {t.name for t in self.terminals}
|
||||
|
||||
self._mres = self._build_mres(terminals, len(terminals))
|
||||
|
||||
def _build_mres(self, terminals, max_size):
|
||||
# Python sets an unreasonable group limit (currently 100) in its re module
|
||||
# Worse, the only way to know we reached it is by catching an AssertionError!
|
||||
# This function recursively tries less and less groups until it's successful.
|
||||
postfix = '$' if self.match_whole else ''
|
||||
mres = []
|
||||
while terminals:
|
||||
pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp() + postfix) for t in terminals[:max_size])
|
||||
if self.use_bytes:
|
||||
pattern = pattern.encode('latin-1')
|
||||
try:
|
||||
mre = self.re_.compile(pattern, self.g_regex_flags)
|
||||
except AssertionError: # Yes, this is what Python provides us.. :/
|
||||
return self._build_mres(terminals, max_size // 2)
|
||||
|
||||
mres.append(mre)
|
||||
terminals = terminals[max_size:]
|
||||
return mres
|
||||
|
||||
def match(self, text, pos):
|
||||
for mre in self._mres:
|
||||
m = mre.match(text, pos)
|
||||
if m:
|
||||
return m.group(0), m.lastgroup
|
||||
|
||||
|
||||
def _regexp_has_newline(r: str):
|
||||
r"""Expressions that may indicate newlines in a regexp:
|
||||
- newlines (\n)
|
||||
- escaped newline (\\n)
|
||||
- anything but ([^...])
|
||||
- any-char (.) when the flag (?s) exists
|
||||
- spaces (\s)
|
||||
"""
|
||||
return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)
|
||||
|
||||
|
||||
class LexerState:
|
||||
"""Represents the current state of the lexer as it scans the text
|
||||
(Lexer objects are only instantiated per grammar, not per text)
|
||||
"""
|
||||
|
||||
__slots__ = 'text', 'line_ctr', 'last_token'
|
||||
|
||||
text: str
|
||||
line_ctr: LineCounter
|
||||
last_token: Optional[Token]
|
||||
|
||||
def __init__(self, text: str, line_ctr: Optional[LineCounter]=None, last_token: Optional[Token]=None):
|
||||
self.text = text
|
||||
self.line_ctr = line_ctr or LineCounter(b'\n' if isinstance(text, bytes) else '\n')
|
||||
self.last_token = last_token
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, LexerState):
|
||||
return NotImplemented
|
||||
|
||||
return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token
|
||||
|
||||
def __copy__(self):
|
||||
return type(self)(self.text, copy(self.line_ctr), self.last_token)
|
||||
|
||||
|
||||
class LexerThread:
|
||||
"""A thread that ties a lexer instance and a lexer state, to be used by the parser
|
||||
"""
|
||||
|
||||
def __init__(self, lexer: 'Lexer', lexer_state: LexerState):
|
||||
self.lexer = lexer
|
||||
self.state = lexer_state
|
||||
|
||||
@classmethod
|
||||
def from_text(cls, lexer: 'Lexer', text: str) -> 'LexerThread':
|
||||
return cls(lexer, LexerState(text))
|
||||
|
||||
def lex(self, parser_state):
|
||||
return self.lexer.lex(self.state, parser_state)
|
||||
|
||||
def __copy__(self):
|
||||
return type(self)(self.lexer, copy(self.state))
|
||||
|
||||
_Token = Token
|
||||
|
||||
|
||||
_Callback = Callable[[Token], Token]
|
||||
|
||||
class Lexer(ABC):
|
||||
"""Lexer interface
|
||||
|
||||
Method Signatures:
|
||||
lex(self, lexer_state, parser_state) -> Iterator[Token]
|
||||
"""
|
||||
@abstractmethod
|
||||
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
|
||||
return NotImplemented
|
||||
|
||||
def make_lexer_state(self, text):
|
||||
"Deprecated"
|
||||
return LexerState(text)
|
||||
|
||||
|
||||
def _check_regex_collisions(terminal_to_regexp: Dict[TerminalDef, str], comparator, strict_mode, max_collisions_to_show=8):
|
||||
if not comparator:
|
||||
comparator = interegular.Comparator.from_regexes(terminal_to_regexp)
|
||||
|
||||
# When in strict mode, we only ever try to provide one example, so taking
|
||||
# a long time for that should be fine
|
||||
max_time = 2 if strict_mode else 0.2
|
||||
|
||||
# We don't want to show too many collisions.
|
||||
if comparator.count_marked_pairs() >= max_collisions_to_show:
|
||||
return
|
||||
for group in classify(terminal_to_regexp, lambda t: t.priority).values():
|
||||
for a, b in comparator.check(group, skip_marked=True):
|
||||
assert a.priority == b.priority
|
||||
# Mark this pair to not repeat warnings when multiple different BasicLexers see the same collision
|
||||
comparator.mark(a, b)
|
||||
|
||||
# Notify the user
|
||||
message = f"Collision between Terminals {a.name} and {b.name}. "
|
||||
try:
|
||||
example = comparator.get_example_overlap(a, b, max_time).format_multiline()
|
||||
except ValueError:
|
||||
# Couldn't find an example within max_time steps.
|
||||
example = "No example could be found fast enough. However, the collision does still exists"
|
||||
if strict_mode:
|
||||
raise LexError(f"{message}\n{example}")
|
||||
logger.warning("%s The lexer will choose between them arbitrarily.\n%s", message, example)
|
||||
if comparator.count_marked_pairs() >= max_collisions_to_show:
|
||||
logger.warning("Found 8 regex collisions, will not check for more.")
|
||||
return
|
||||
|
||||
|
||||
class AbstractBasicLexer(Lexer):
|
||||
terminals_by_name: Dict[str, TerminalDef]
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, conf: 'LexerConf', comparator=None) -> None:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token:
|
||||
...
|
||||
|
||||
def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]:
|
||||
with suppress(EOFError):
|
||||
while True:
|
||||
yield self.next_token(state, parser_state)
|
||||
|
||||
|
||||
class BasicLexer(AbstractBasicLexer):
|
||||
terminals: Collection[TerminalDef]
|
||||
ignore_types: FrozenSet[str]
|
||||
newline_types: FrozenSet[str]
|
||||
user_callbacks: Dict[str, _Callback]
|
||||
callback: Dict[str, _Callback]
|
||||
re: ModuleType
|
||||
|
||||
def __init__(self, conf: 'LexerConf', comparator=None) -> None:
|
||||
terminals = list(conf.terminals)
|
||||
assert all(isinstance(t, TerminalDef) for t in terminals), terminals
|
||||
|
||||
self.re = conf.re_module
|
||||
|
||||
if not conf.skip_validation:
|
||||
# Sanitization
|
||||
terminal_to_regexp = {}
|
||||
for t in terminals:
|
||||
regexp = t.pattern.to_regexp()
|
||||
try:
|
||||
self.re.compile(regexp, conf.g_regex_flags)
|
||||
except self.re.error:
|
||||
raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
|
||||
|
||||
if t.pattern.min_width == 0:
|
||||
raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))
|
||||
if t.pattern.type == "re":
|
||||
terminal_to_regexp[t] = regexp
|
||||
|
||||
if not (set(conf.ignore) <= {t.name for t in terminals}):
|
||||
raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals}))
|
||||
|
||||
if has_interegular:
|
||||
_check_regex_collisions(terminal_to_regexp, comparator, conf.strict)
|
||||
elif conf.strict:
|
||||
raise LexError("interegular must be installed for strict mode. Use `pip install 'lark[interegular]'`.")
|
||||
|
||||
# Init
|
||||
self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp()))
|
||||
self.ignore_types = frozenset(conf.ignore)
|
||||
|
||||
terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
|
||||
self.terminals = terminals
|
||||
self.user_callbacks = conf.callbacks
|
||||
self.g_regex_flags = conf.g_regex_flags
|
||||
self.use_bytes = conf.use_bytes
|
||||
self.terminals_by_name = conf.terminals_by_name
|
||||
|
||||
self._scanner = None
|
||||
|
||||
def _build_scanner(self):
|
||||
terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes)
|
||||
assert all(self.callback.values())
|
||||
|
||||
for type_, f in self.user_callbacks.items():
|
||||
if type_ in self.callback:
|
||||
# Already a callback there, probably UnlessCallback
|
||||
self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_)
|
||||
else:
|
||||
self.callback[type_] = f
|
||||
|
||||
self._scanner = Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes)
|
||||
|
||||
@property
|
||||
def scanner(self):
|
||||
if self._scanner is None:
|
||||
self._build_scanner()
|
||||
return self._scanner
|
||||
|
||||
def match(self, text, pos):
|
||||
return self.scanner.match(text, pos)
|
||||
|
||||
def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token:
|
||||
line_ctr = lex_state.line_ctr
|
||||
while line_ctr.char_pos < len(lex_state.text):
|
||||
res = self.match(lex_state.text, line_ctr.char_pos)
|
||||
if not res:
|
||||
allowed = self.scanner.allowed_types - self.ignore_types
|
||||
if not allowed:
|
||||
allowed = {"<END-OF-FILE>"}
|
||||
raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,
|
||||
allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token],
|
||||
state=parser_state, terminals_by_name=self.terminals_by_name)
|
||||
|
||||
value, type_ = res
|
||||
|
||||
ignored = type_ in self.ignore_types
|
||||
t = None
|
||||
if not ignored or type_ in self.callback:
|
||||
t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
|
||||
line_ctr.feed(value, type_ in self.newline_types)
|
||||
if t is not None:
|
||||
t.end_line = line_ctr.line
|
||||
t.end_column = line_ctr.column
|
||||
t.end_pos = line_ctr.char_pos
|
||||
if t.type in self.callback:
|
||||
t = self.callback[t.type](t)
|
||||
if not ignored:
|
||||
if not isinstance(t, Token):
|
||||
raise LexError("Callbacks must return a token (returned %r)" % t)
|
||||
lex_state.last_token = t
|
||||
return t
|
||||
|
||||
# EOF
|
||||
raise EOFError(self)
|
||||
|
||||
|
||||
class ContextualLexer(Lexer):
|
||||
lexers: Dict[int, AbstractBasicLexer]
|
||||
root_lexer: AbstractBasicLexer
|
||||
|
||||
BasicLexer: Type[AbstractBasicLexer] = BasicLexer
|
||||
|
||||
def __init__(self, conf: 'LexerConf', states: Dict[int, Collection[str]], always_accept: Collection[str]=()) -> None:
|
||||
terminals = list(conf.terminals)
|
||||
terminals_by_name = conf.terminals_by_name
|
||||
|
||||
trad_conf = copy(conf)
|
||||
trad_conf.terminals = terminals
|
||||
|
||||
if has_interegular and not conf.skip_validation:
|
||||
comparator = interegular.Comparator.from_regexes({t: t.pattern.to_regexp() for t in terminals})
|
||||
else:
|
||||
comparator = None
|
||||
lexer_by_tokens: Dict[FrozenSet[str], AbstractBasicLexer] = {}
|
||||
self.lexers = {}
|
||||
for state, accepts in states.items():
|
||||
key = frozenset(accepts)
|
||||
try:
|
||||
lexer = lexer_by_tokens[key]
|
||||
except KeyError:
|
||||
accepts = set(accepts) | set(conf.ignore) | set(always_accept)
|
||||
lexer_conf = copy(trad_conf)
|
||||
lexer_conf.terminals = [terminals_by_name[n] for n in accepts if n in terminals_by_name]
|
||||
lexer = self.BasicLexer(lexer_conf, comparator)
|
||||
lexer_by_tokens[key] = lexer
|
||||
|
||||
self.lexers[state] = lexer
|
||||
|
||||
assert trad_conf.terminals is terminals
|
||||
trad_conf.skip_validation = True # We don't need to verify all terminals again
|
||||
self.root_lexer = self.BasicLexer(trad_conf, comparator)
|
||||
|
||||
def lex(self, lexer_state: LexerState, parser_state: 'ParserState') -> Iterator[Token]:
|
||||
try:
|
||||
while True:
|
||||
lexer = self.lexers[parser_state.position]
|
||||
yield lexer.next_token(lexer_state, parser_state)
|
||||
except EOFError:
|
||||
pass
|
||||
except UnexpectedCharacters as e:
|
||||
# In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context.
|
||||
# This tests the input against the global context, to provide a nicer error.
|
||||
try:
|
||||
last_token = lexer_state.last_token # Save last_token. Calling root_lexer.next_token will change this to the wrong token
|
||||
token = self.root_lexer.next_token(lexer_state, parser_state)
|
||||
raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_name)
|
||||
except UnexpectedCharacters:
|
||||
raise e # Raise the original UnexpectedCharacters. The root lexer raises it with the wrong expected set.
|
||||
|
||||
###}
|
||||
1428
ccxt/static_dependencies/lark/load_grammar.py
Normal file
1428
ccxt/static_dependencies/lark/load_grammar.py
Normal file
File diff suppressed because it is too large
Load Diff
391
ccxt/static_dependencies/lark/parse_tree_builder.py
Normal file
391
ccxt/static_dependencies/lark/parse_tree_builder.py
Normal file
@@ -0,0 +1,391 @@
|
||||
"""Provides functions for the automatic building and shaping of the parse-tree."""
|
||||
|
||||
from typing import List
|
||||
|
||||
from .exceptions import GrammarError, ConfigurationError
|
||||
from .lexer import Token
|
||||
from .tree import Tree
|
||||
from .visitors import Transformer_InPlace
|
||||
from .visitors import _vargs_meta, _vargs_meta_inline
|
||||
|
||||
###{standalone
|
||||
from functools import partial, wraps
|
||||
from itertools import product
|
||||
|
||||
|
||||
class ExpandSingleChild:
|
||||
def __init__(self, node_builder):
|
||||
self.node_builder = node_builder
|
||||
|
||||
def __call__(self, children):
|
||||
if len(children) == 1:
|
||||
return children[0]
|
||||
else:
|
||||
return self.node_builder(children)
|
||||
|
||||
|
||||
|
||||
class PropagatePositions:
|
||||
def __init__(self, node_builder, node_filter=None):
|
||||
self.node_builder = node_builder
|
||||
self.node_filter = node_filter
|
||||
|
||||
def __call__(self, children):
|
||||
res = self.node_builder(children)
|
||||
|
||||
if isinstance(res, Tree):
|
||||
# Calculate positions while the tree is streaming, according to the rule:
|
||||
# - nodes start at the start of their first child's container,
|
||||
# and end at the end of their last child's container.
|
||||
# Containers are nodes that take up space in text, but have been inlined in the tree.
|
||||
|
||||
res_meta = res.meta
|
||||
|
||||
first_meta = self._pp_get_meta(children)
|
||||
if first_meta is not None:
|
||||
if not hasattr(res_meta, 'line'):
|
||||
# meta was already set, probably because the rule has been inlined (e.g. `?rule`)
|
||||
res_meta.line = getattr(first_meta, 'container_line', first_meta.line)
|
||||
res_meta.column = getattr(first_meta, 'container_column', first_meta.column)
|
||||
res_meta.start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos)
|
||||
res_meta.empty = False
|
||||
|
||||
res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line)
|
||||
res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column)
|
||||
res_meta.container_start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos)
|
||||
|
||||
last_meta = self._pp_get_meta(reversed(children))
|
||||
if last_meta is not None:
|
||||
if not hasattr(res_meta, 'end_line'):
|
||||
res_meta.end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
|
||||
res_meta.end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
|
||||
res_meta.end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos)
|
||||
res_meta.empty = False
|
||||
|
||||
res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
|
||||
res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
|
||||
res_meta.container_end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos)
|
||||
|
||||
return res
|
||||
|
||||
def _pp_get_meta(self, children):
|
||||
for c in children:
|
||||
if self.node_filter is not None and not self.node_filter(c):
|
||||
continue
|
||||
if isinstance(c, Tree):
|
||||
if not c.meta.empty:
|
||||
return c.meta
|
||||
elif isinstance(c, Token):
|
||||
return c
|
||||
elif hasattr(c, '__lark_meta__'):
|
||||
return c.__lark_meta__()
|
||||
|
||||
def make_propagate_positions(option):
|
||||
if callable(option):
|
||||
return partial(PropagatePositions, node_filter=option)
|
||||
elif option is True:
|
||||
return PropagatePositions
|
||||
elif option is False:
|
||||
return None
|
||||
|
||||
raise ConfigurationError('Invalid option for propagate_positions: %r' % option)
|
||||
|
||||
|
||||
class ChildFilter:
|
||||
def __init__(self, to_include, append_none, node_builder):
|
||||
self.node_builder = node_builder
|
||||
self.to_include = to_include
|
||||
self.append_none = append_none
|
||||
|
||||
def __call__(self, children):
|
||||
filtered = []
|
||||
|
||||
for i, to_expand, add_none in self.to_include:
|
||||
if add_none:
|
||||
filtered += [None] * add_none
|
||||
if to_expand:
|
||||
filtered += children[i].children
|
||||
else:
|
||||
filtered.append(children[i])
|
||||
|
||||
if self.append_none:
|
||||
filtered += [None] * self.append_none
|
||||
|
||||
return self.node_builder(filtered)
|
||||
|
||||
|
||||
class ChildFilterLALR(ChildFilter):
|
||||
"""Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"""
|
||||
|
||||
def __call__(self, children):
|
||||
filtered = []
|
||||
for i, to_expand, add_none in self.to_include:
|
||||
if add_none:
|
||||
filtered += [None] * add_none
|
||||
if to_expand:
|
||||
if filtered:
|
||||
filtered += children[i].children
|
||||
else: # Optimize for left-recursion
|
||||
filtered = children[i].children
|
||||
else:
|
||||
filtered.append(children[i])
|
||||
|
||||
if self.append_none:
|
||||
filtered += [None] * self.append_none
|
||||
|
||||
return self.node_builder(filtered)
|
||||
|
||||
|
||||
class ChildFilterLALR_NoPlaceholders(ChildFilter):
|
||||
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
|
||||
def __init__(self, to_include, node_builder):
|
||||
self.node_builder = node_builder
|
||||
self.to_include = to_include
|
||||
|
||||
def __call__(self, children):
|
||||
filtered = []
|
||||
for i, to_expand in self.to_include:
|
||||
if to_expand:
|
||||
if filtered:
|
||||
filtered += children[i].children
|
||||
else: # Optimize for left-recursion
|
||||
filtered = children[i].children
|
||||
else:
|
||||
filtered.append(children[i])
|
||||
return self.node_builder(filtered)
|
||||
|
||||
|
||||
def _should_expand(sym):
|
||||
return not sym.is_term and sym.name.startswith('_')
|
||||
|
||||
|
||||
def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]):
|
||||
# Prepare empty_indices as: How many Nones to insert at each index?
|
||||
if _empty_indices:
|
||||
assert _empty_indices.count(False) == len(expansion)
|
||||
s = ''.join(str(int(b)) for b in _empty_indices)
|
||||
empty_indices = [len(ones) for ones in s.split('0')]
|
||||
assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
|
||||
else:
|
||||
empty_indices = [0] * (len(expansion)+1)
|
||||
|
||||
to_include = []
|
||||
nones_to_add = 0
|
||||
for i, sym in enumerate(expansion):
|
||||
nones_to_add += empty_indices[i]
|
||||
if keep_all_tokens or not (sym.is_term and sym.filter_out):
|
||||
to_include.append((i, _should_expand(sym), nones_to_add))
|
||||
nones_to_add = 0
|
||||
|
||||
nones_to_add += empty_indices[len(expansion)]
|
||||
|
||||
if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
|
||||
if _empty_indices or ambiguous:
|
||||
return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
|
||||
else:
|
||||
# LALR without placeholders
|
||||
return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])
|
||||
|
||||
|
||||
class AmbiguousExpander:
|
||||
"""Deal with the case where we're expanding children ('_rule') into a parent but the children
|
||||
are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
|
||||
ambiguous with as many copies as there are ambiguous children, and then copy the ambiguous children
|
||||
into the right parents in the right places, essentially shifting the ambiguity up the tree."""
|
||||
def __init__(self, to_expand, tree_class, node_builder):
|
||||
self.node_builder = node_builder
|
||||
self.tree_class = tree_class
|
||||
self.to_expand = to_expand
|
||||
|
||||
def __call__(self, children):
|
||||
def _is_ambig_tree(t):
|
||||
return hasattr(t, 'data') and t.data == '_ambig'
|
||||
|
||||
# -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
|
||||
# All children of an _ambig node should be a derivation of that ambig node, hence
|
||||
# it is safe to assume that if we see an _ambig node nested within an ambig node
|
||||
# it is safe to simply expand it into the parent _ambig node as an alternative derivation.
|
||||
ambiguous = []
|
||||
for i, child in enumerate(children):
|
||||
if _is_ambig_tree(child):
|
||||
if i in self.to_expand:
|
||||
ambiguous.append(i)
|
||||
|
||||
child.expand_kids_by_data('_ambig')
|
||||
|
||||
if not ambiguous:
|
||||
return self.node_builder(children)
|
||||
|
||||
expand = [child.children if i in ambiguous else (child,) for i, child in enumerate(children)]
|
||||
return self.tree_class('_ambig', [self.node_builder(list(f)) for f in product(*expand)])
|
||||
|
||||
|
||||
def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
|
||||
to_expand = [i for i, sym in enumerate(expansion)
|
||||
if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
|
||||
if to_expand:
|
||||
return partial(AmbiguousExpander, to_expand, tree_class)
|
||||
|
||||
|
||||
class AmbiguousIntermediateExpander:
|
||||
"""
|
||||
Propagate ambiguous intermediate nodes and their derivations up to the
|
||||
current rule.
|
||||
|
||||
In general, converts
|
||||
|
||||
rule
|
||||
_iambig
|
||||
_inter
|
||||
someChildren1
|
||||
...
|
||||
_inter
|
||||
someChildren2
|
||||
...
|
||||
someChildren3
|
||||
...
|
||||
|
||||
to
|
||||
|
||||
_ambig
|
||||
rule
|
||||
someChildren1
|
||||
...
|
||||
someChildren3
|
||||
...
|
||||
rule
|
||||
someChildren2
|
||||
...
|
||||
someChildren3
|
||||
...
|
||||
rule
|
||||
childrenFromNestedIambigs
|
||||
...
|
||||
someChildren3
|
||||
...
|
||||
...
|
||||
|
||||
propagating up any nested '_iambig' nodes along the way.
|
||||
"""
|
||||
|
||||
def __init__(self, tree_class, node_builder):
|
||||
self.node_builder = node_builder
|
||||
self.tree_class = tree_class
|
||||
|
||||
def __call__(self, children):
|
||||
def _is_iambig_tree(child):
|
||||
return hasattr(child, 'data') and child.data == '_iambig'
|
||||
|
||||
def _collapse_iambig(children):
|
||||
"""
|
||||
Recursively flatten the derivations of the parent of an '_iambig'
|
||||
node. Returns a list of '_inter' nodes guaranteed not
|
||||
to contain any nested '_iambig' nodes, or None if children does
|
||||
not contain an '_iambig' node.
|
||||
"""
|
||||
|
||||
# Due to the structure of the SPPF,
|
||||
# an '_iambig' node can only appear as the first child
|
||||
if children and _is_iambig_tree(children[0]):
|
||||
iambig_node = children[0]
|
||||
result = []
|
||||
for grandchild in iambig_node.children:
|
||||
collapsed = _collapse_iambig(grandchild.children)
|
||||
if collapsed:
|
||||
for child in collapsed:
|
||||
child.children += children[1:]
|
||||
result += collapsed
|
||||
else:
|
||||
new_tree = self.tree_class('_inter', grandchild.children + children[1:])
|
||||
result.append(new_tree)
|
||||
return result
|
||||
|
||||
collapsed = _collapse_iambig(children)
|
||||
if collapsed:
|
||||
processed_nodes = [self.node_builder(c.children) for c in collapsed]
|
||||
return self.tree_class('_ambig', processed_nodes)
|
||||
|
||||
return self.node_builder(children)
|
||||
|
||||
|
||||
|
||||
def inplace_transformer(func):
|
||||
@wraps(func)
|
||||
def f(children):
|
||||
# function name in a Transformer is a rule name.
|
||||
tree = Tree(func.__name__, children)
|
||||
return func(tree)
|
||||
return f
|
||||
|
||||
|
||||
def apply_visit_wrapper(func, name, wrapper):
|
||||
if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
|
||||
raise NotImplementedError("Meta args not supported for internal transformer")
|
||||
|
||||
@wraps(func)
|
||||
def f(children):
|
||||
return wrapper(func, name, children, None)
|
||||
return f
|
||||
|
||||
|
||||
class ParseTreeBuilder:
|
||||
def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False):
|
||||
self.tree_class = tree_class
|
||||
self.propagate_positions = propagate_positions
|
||||
self.ambiguous = ambiguous
|
||||
self.maybe_placeholders = maybe_placeholders
|
||||
|
||||
self.rule_builders = list(self._init_builders(rules))
|
||||
|
||||
def _init_builders(self, rules):
|
||||
propagate_positions = make_propagate_positions(self.propagate_positions)
|
||||
|
||||
for rule in rules:
|
||||
options = rule.options
|
||||
keep_all_tokens = options.keep_all_tokens
|
||||
expand_single_child = options.expand1
|
||||
|
||||
wrapper_chain = list(filter(None, [
|
||||
(expand_single_child and not rule.alias) and ExpandSingleChild,
|
||||
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
|
||||
propagate_positions,
|
||||
self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
|
||||
self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class)
|
||||
]))
|
||||
|
||||
yield rule, wrapper_chain
|
||||
|
||||
def create_callback(self, transformer=None):
|
||||
callbacks = {}
|
||||
|
||||
default_handler = getattr(transformer, '__default__', None)
|
||||
if default_handler:
|
||||
def default_callback(data, children):
|
||||
return default_handler(data, children, None)
|
||||
else:
|
||||
default_callback = self.tree_class
|
||||
|
||||
for rule, wrapper_chain in self.rule_builders:
|
||||
|
||||
user_callback_name = rule.alias or rule.options.template_source or rule.origin.name
|
||||
try:
|
||||
f = getattr(transformer, user_callback_name)
|
||||
wrapper = getattr(f, 'visit_wrapper', None)
|
||||
if wrapper is not None:
|
||||
f = apply_visit_wrapper(f, user_callback_name, wrapper)
|
||||
elif isinstance(transformer, Transformer_InPlace):
|
||||
f = inplace_transformer(f)
|
||||
except AttributeError:
|
||||
f = partial(default_callback, user_callback_name)
|
||||
|
||||
for w in wrapper_chain:
|
||||
f = w(f)
|
||||
|
||||
if rule in callbacks:
|
||||
raise GrammarError("Rule '%s' already exists" % (rule,))
|
||||
|
||||
callbacks[rule] = f
|
||||
|
||||
return callbacks
|
||||
|
||||
###}
|
||||
257
ccxt/static_dependencies/lark/parser_frontends.py
Normal file
257
ccxt/static_dependencies/lark/parser_frontends.py
Normal file
@@ -0,0 +1,257 @@
|
||||
from typing import Any, Callable, Dict, Optional, Collection, Union, TYPE_CHECKING
|
||||
|
||||
from .exceptions import ConfigurationError, GrammarError, assert_config
|
||||
from .utils import get_regexp_width, Serialize
|
||||
from .lexer import LexerThread, BasicLexer, ContextualLexer, Lexer
|
||||
from .parsers import earley, xearley, cyk
|
||||
from .parsers.lalr_parser import LALR_Parser
|
||||
from .tree import Tree
|
||||
from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .parsers.lalr_analysis import ParseTableBase
|
||||
|
||||
|
||||
###{standalone
|
||||
|
||||
def _wrap_lexer(lexer_class):
|
||||
future_interface = getattr(lexer_class, '__future_interface__', False)
|
||||
if future_interface:
|
||||
return lexer_class
|
||||
else:
|
||||
class CustomLexerWrapper(Lexer):
|
||||
def __init__(self, lexer_conf):
|
||||
self.lexer = lexer_class(lexer_conf)
|
||||
def lex(self, lexer_state, parser_state):
|
||||
return self.lexer.lex(lexer_state.text)
|
||||
return CustomLexerWrapper
|
||||
|
||||
|
||||
def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options):
|
||||
parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
|
||||
cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
|
||||
parser = cls.deserialize(data['parser'], memo, callbacks, options.debug)
|
||||
parser_conf.callbacks = callbacks
|
||||
return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)
|
||||
|
||||
|
||||
_parser_creators: 'Dict[str, Callable[[LexerConf, Any, Any], Any]]' = {}
|
||||
|
||||
|
||||
class ParsingFrontend(Serialize):
|
||||
__serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser'
|
||||
|
||||
lexer_conf: LexerConf
|
||||
parser_conf: ParserConf
|
||||
options: Any
|
||||
|
||||
def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, parser=None):
|
||||
self.parser_conf = parser_conf
|
||||
self.lexer_conf = lexer_conf
|
||||
self.options = options
|
||||
|
||||
# Set-up parser
|
||||
if parser: # From cache
|
||||
self.parser = parser
|
||||
else:
|
||||
create_parser = _parser_creators.get(parser_conf.parser_type)
|
||||
assert create_parser is not None, "{} is not supported in standalone mode".format(
|
||||
parser_conf.parser_type
|
||||
)
|
||||
self.parser = create_parser(lexer_conf, parser_conf, options)
|
||||
|
||||
# Set-up lexer
|
||||
lexer_type = lexer_conf.lexer_type
|
||||
self.skip_lexer = False
|
||||
if lexer_type in ('dynamic', 'dynamic_complete'):
|
||||
assert lexer_conf.postlex is None
|
||||
self.skip_lexer = True
|
||||
return
|
||||
|
||||
if isinstance(lexer_type, type):
|
||||
assert issubclass(lexer_type, Lexer)
|
||||
self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
|
||||
elif isinstance(lexer_type, str):
|
||||
create_lexer = {
|
||||
'basic': create_basic_lexer,
|
||||
'contextual': create_contextual_lexer,
|
||||
}[lexer_type]
|
||||
self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options)
|
||||
else:
|
||||
raise TypeError("Bad value for lexer_type: {lexer_type}")
|
||||
|
||||
if lexer_conf.postlex:
|
||||
self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
|
||||
|
||||
def _verify_start(self, start=None):
|
||||
if start is None:
|
||||
start_decls = self.parser_conf.start
|
||||
if len(start_decls) > 1:
|
||||
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls)
|
||||
start ,= start_decls
|
||||
elif start not in self.parser_conf.start:
|
||||
raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
|
||||
return start
|
||||
|
||||
def _make_lexer_thread(self, text: str) -> Union[str, LexerThread]:
|
||||
cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
|
||||
return text if self.skip_lexer else cls.from_text(self.lexer, text)
|
||||
|
||||
def parse(self, text: str, start=None, on_error=None):
|
||||
chosen_start = self._verify_start(start)
|
||||
kw = {} if on_error is None else {'on_error': on_error}
|
||||
stream = self._make_lexer_thread(text)
|
||||
return self.parser.parse(stream, chosen_start, **kw)
|
||||
|
||||
def parse_interactive(self, text: Optional[str]=None, start=None):
|
||||
# TODO BREAK - Change text from Optional[str] to text: str = ''.
|
||||
# Would break behavior of exhaust_lexer(), which currently raises TypeError, and after the change would just return []
|
||||
chosen_start = self._verify_start(start)
|
||||
if self.parser_conf.parser_type != 'lalr':
|
||||
raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
|
||||
stream = self._make_lexer_thread(text) # type: ignore[arg-type]
|
||||
return self.parser.parse_interactive(stream, chosen_start)
|
||||
|
||||
|
||||
def _validate_frontend_args(parser, lexer) -> None:
|
||||
assert_config(parser, ('lalr', 'earley', 'cyk'))
|
||||
if not isinstance(lexer, type): # not custom lexer?
|
||||
expected = {
|
||||
'lalr': ('basic', 'contextual'),
|
||||
'earley': ('basic', 'dynamic', 'dynamic_complete'),
|
||||
'cyk': ('basic', ),
|
||||
}[parser]
|
||||
assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser)
|
||||
|
||||
|
||||
def _get_lexer_callbacks(transformer, terminals):
|
||||
result = {}
|
||||
for terminal in terminals:
|
||||
callback = getattr(transformer, terminal.name, None)
|
||||
if callback is not None:
|
||||
result[terminal.name] = callback
|
||||
return result
|
||||
|
||||
class PostLexConnector:
|
||||
def __init__(self, lexer, postlexer):
|
||||
self.lexer = lexer
|
||||
self.postlexer = postlexer
|
||||
|
||||
def lex(self, lexer_state, parser_state):
|
||||
i = self.lexer.lex(lexer_state, parser_state)
|
||||
return self.postlexer.process(i)
|
||||
|
||||
|
||||
|
||||
def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer:
|
||||
cls = (options and options._plugins.get('BasicLexer')) or BasicLexer
|
||||
return cls(lexer_conf)
|
||||
|
||||
def create_contextual_lexer(lexer_conf: LexerConf, parser, postlex, options) -> ContextualLexer:
|
||||
cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer
|
||||
parse_table: ParseTableBase[int] = parser._parse_table
|
||||
states: Dict[int, Collection[str]] = {idx:list(t.keys()) for idx, t in parse_table.states.items()}
|
||||
always_accept: Collection[str] = postlex.always_accept if postlex else ()
|
||||
return cls(lexer_conf, states, always_accept=always_accept)
|
||||
|
||||
def create_lalr_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options=None) -> LALR_Parser:
|
||||
debug = options.debug if options else False
|
||||
strict = options.strict if options else False
|
||||
cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
|
||||
return cls(parser_conf, debug=debug, strict=strict)
|
||||
|
||||
_parser_creators['lalr'] = create_lalr_parser
|
||||
|
||||
###}
|
||||
|
||||
class EarleyRegexpMatcher:
|
||||
def __init__(self, lexer_conf):
|
||||
self.regexps = {}
|
||||
for t in lexer_conf.terminals:
|
||||
regexp = t.pattern.to_regexp()
|
||||
try:
|
||||
width = get_regexp_width(regexp)[0]
|
||||
except ValueError:
|
||||
raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp))
|
||||
else:
|
||||
if width == 0:
|
||||
raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t)
|
||||
if lexer_conf.use_bytes:
|
||||
regexp = regexp.encode('utf-8')
|
||||
|
||||
self.regexps[t.name] = lexer_conf.re_module.compile(regexp, lexer_conf.g_regex_flags)
|
||||
|
||||
def match(self, term, text, index=0):
|
||||
return self.regexps[term.name].match(text, index)
|
||||
|
||||
|
||||
def create_earley_parser__dynamic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw):
|
||||
if lexer_conf.callbacks:
|
||||
raise GrammarError("Earley's dynamic lexer doesn't support lexer_callbacks.")
|
||||
|
||||
earley_matcher = EarleyRegexpMatcher(lexer_conf)
|
||||
return xearley.Parser(lexer_conf, parser_conf, earley_matcher.match, **kw)
|
||||
|
||||
def _match_earley_basic(term, token):
|
||||
return term.name == token.type
|
||||
|
||||
def create_earley_parser__basic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw):
|
||||
return earley.Parser(lexer_conf, parser_conf, _match_earley_basic, **kw)
|
||||
|
||||
def create_earley_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options) -> earley.Parser:
|
||||
resolve_ambiguity = options.ambiguity == 'resolve'
|
||||
debug = options.debug if options else False
|
||||
tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
|
||||
|
||||
extra = {}
|
||||
if lexer_conf.lexer_type == 'dynamic':
|
||||
f = create_earley_parser__dynamic
|
||||
elif lexer_conf.lexer_type == 'dynamic_complete':
|
||||
extra['complete_lex'] = True
|
||||
f = create_earley_parser__dynamic
|
||||
else:
|
||||
f = create_earley_parser__basic
|
||||
|
||||
return f(lexer_conf, parser_conf, resolve_ambiguity=resolve_ambiguity,
|
||||
debug=debug, tree_class=tree_class, ordered_sets=options.ordered_sets, **extra)
|
||||
|
||||
|
||||
|
||||
class CYK_FrontEnd:
|
||||
def __init__(self, lexer_conf, parser_conf, options=None):
|
||||
self.parser = cyk.Parser(parser_conf.rules)
|
||||
|
||||
self.callbacks = parser_conf.callbacks
|
||||
|
||||
def parse(self, lexer_thread, start):
|
||||
tokens = list(lexer_thread.lex(None))
|
||||
tree = self.parser.parse(tokens, start)
|
||||
return self._transform(tree)
|
||||
|
||||
def _transform(self, tree):
|
||||
subtrees = list(tree.iter_subtrees())
|
||||
for subtree in subtrees:
|
||||
subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children]
|
||||
|
||||
return self._apply_callback(tree)
|
||||
|
||||
def _apply_callback(self, tree):
|
||||
return self.callbacks[tree.rule](tree.children)
|
||||
|
||||
|
||||
_parser_creators['earley'] = create_earley_parser
|
||||
_parser_creators['cyk'] = CYK_FrontEnd
|
||||
|
||||
|
||||
def _construct_parsing_frontend(
|
||||
parser_type: _ParserArgType,
|
||||
lexer_type: _LexerArgType,
|
||||
lexer_conf,
|
||||
parser_conf,
|
||||
options
|
||||
):
|
||||
assert isinstance(lexer_conf, LexerConf)
|
||||
assert isinstance(parser_conf, ParserConf)
|
||||
parser_conf.parser_type = parser_type
|
||||
lexer_conf.lexer_type = lexer_type
|
||||
return ParsingFrontend(lexer_conf, parser_conf, options)
|
||||
0
ccxt/static_dependencies/lark/parsers/__init__.py
Normal file
0
ccxt/static_dependencies/lark/parsers/__init__.py
Normal file
340
ccxt/static_dependencies/lark/parsers/cyk.py
Normal file
340
ccxt/static_dependencies/lark/parsers/cyk.py
Normal file
@@ -0,0 +1,340 @@
|
||||
"""This module implements a CYK parser."""
|
||||
|
||||
# Author: https://github.com/ehudt (2018)
|
||||
#
|
||||
# Adapted by Erez
|
||||
|
||||
|
||||
from collections import defaultdict
|
||||
import itertools
|
||||
|
||||
from ..exceptions import ParseError
|
||||
from ..lexer import Token
|
||||
from ..tree import Tree
|
||||
from ..grammar import Terminal as T, NonTerminal as NT, Symbol
|
||||
|
||||
def match(t, s):
|
||||
assert isinstance(t, T)
|
||||
return t.name == s.type
|
||||
|
||||
|
||||
class Rule:
|
||||
"""Context-free grammar rule."""
|
||||
|
||||
def __init__(self, lhs, rhs, weight, alias):
|
||||
super(Rule, self).__init__()
|
||||
assert isinstance(lhs, NT), lhs
|
||||
assert all(isinstance(x, NT) or isinstance(x, T) for x in rhs), rhs
|
||||
self.lhs = lhs
|
||||
self.rhs = rhs
|
||||
self.weight = weight
|
||||
self.alias = alias
|
||||
|
||||
def __str__(self):
|
||||
return '%s -> %s' % (str(self.lhs), ' '.join(str(x) for x in self.rhs))
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.lhs, tuple(self.rhs)))
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.lhs == other.lhs and self.rhs == other.rhs
|
||||
|
||||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
|
||||
class Grammar:
|
||||
"""Context-free grammar."""
|
||||
|
||||
def __init__(self, rules):
|
||||
self.rules = frozenset(rules)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.rules == other.rules
|
||||
|
||||
def __str__(self):
|
||||
return '\n' + '\n'.join(sorted(repr(x) for x in self.rules)) + '\n'
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
|
||||
# Parse tree data structures
|
||||
class RuleNode:
|
||||
"""A node in the parse tree, which also contains the full rhs rule."""
|
||||
|
||||
def __init__(self, rule, children, weight=0):
|
||||
self.rule = rule
|
||||
self.children = children
|
||||
self.weight = weight
|
||||
|
||||
def __repr__(self):
|
||||
return 'RuleNode(%s, [%s])' % (repr(self.rule.lhs), ', '.join(str(x) for x in self.children))
|
||||
|
||||
|
||||
|
||||
class Parser:
|
||||
"""Parser wrapper."""
|
||||
|
||||
def __init__(self, rules):
|
||||
super(Parser, self).__init__()
|
||||
self.orig_rules = {rule: rule for rule in rules}
|
||||
rules = [self._to_rule(rule) for rule in rules]
|
||||
self.grammar = to_cnf(Grammar(rules))
|
||||
|
||||
def _to_rule(self, lark_rule):
|
||||
"""Converts a lark rule, (lhs, rhs, callback, options), to a Rule."""
|
||||
assert isinstance(lark_rule.origin, NT)
|
||||
assert all(isinstance(x, Symbol) for x in lark_rule.expansion)
|
||||
return Rule(
|
||||
lark_rule.origin, lark_rule.expansion,
|
||||
weight=lark_rule.options.priority if lark_rule.options.priority else 0,
|
||||
alias=lark_rule)
|
||||
|
||||
def parse(self, tokenized, start): # pylint: disable=invalid-name
|
||||
"""Parses input, which is a list of tokens."""
|
||||
assert start
|
||||
start = NT(start)
|
||||
|
||||
table, trees = _parse(tokenized, self.grammar)
|
||||
# Check if the parse succeeded.
|
||||
if all(r.lhs != start for r in table[(0, len(tokenized) - 1)]):
|
||||
raise ParseError('Parsing failed.')
|
||||
parse = trees[(0, len(tokenized) - 1)][start]
|
||||
return self._to_tree(revert_cnf(parse))
|
||||
|
||||
def _to_tree(self, rule_node):
|
||||
"""Converts a RuleNode parse tree to a lark Tree."""
|
||||
orig_rule = self.orig_rules[rule_node.rule.alias]
|
||||
children = []
|
||||
for child in rule_node.children:
|
||||
if isinstance(child, RuleNode):
|
||||
children.append(self._to_tree(child))
|
||||
else:
|
||||
assert isinstance(child.name, Token)
|
||||
children.append(child.name)
|
||||
t = Tree(orig_rule.origin, children)
|
||||
t.rule=orig_rule
|
||||
return t
|
||||
|
||||
|
||||
def print_parse(node, indent=0):
|
||||
if isinstance(node, RuleNode):
|
||||
print(' ' * (indent * 2) + str(node.rule.lhs))
|
||||
for child in node.children:
|
||||
print_parse(child, indent + 1)
|
||||
else:
|
||||
print(' ' * (indent * 2) + str(node.s))
|
||||
|
||||
|
||||
def _parse(s, g):
|
||||
"""Parses sentence 's' using CNF grammar 'g'."""
|
||||
# The CYK table. Indexed with a 2-tuple: (start pos, end pos)
|
||||
table = defaultdict(set)
|
||||
# Top-level structure is similar to the CYK table. Each cell is a dict from
|
||||
# rule name to the best (lightest) tree for that rule.
|
||||
trees = defaultdict(dict)
|
||||
# Populate base case with existing terminal production rules
|
||||
for i, w in enumerate(s):
|
||||
for terminal, rules in g.terminal_rules.items():
|
||||
if match(terminal, w):
|
||||
for rule in rules:
|
||||
table[(i, i)].add(rule)
|
||||
if (rule.lhs not in trees[(i, i)] or
|
||||
rule.weight < trees[(i, i)][rule.lhs].weight):
|
||||
trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight)
|
||||
|
||||
# Iterate over lengths of sub-sentences
|
||||
for l in range(2, len(s) + 1):
|
||||
# Iterate over sub-sentences with the given length
|
||||
for i in range(len(s) - l + 1):
|
||||
# Choose partition of the sub-sentence in [1, l)
|
||||
for p in range(i + 1, i + l):
|
||||
span1 = (i, p - 1)
|
||||
span2 = (p, i + l - 1)
|
||||
for r1, r2 in itertools.product(table[span1], table[span2]):
|
||||
for rule in g.nonterminal_rules.get((r1.lhs, r2.lhs), []):
|
||||
table[(i, i + l - 1)].add(rule)
|
||||
r1_tree = trees[span1][r1.lhs]
|
||||
r2_tree = trees[span2][r2.lhs]
|
||||
rule_total_weight = rule.weight + r1_tree.weight + r2_tree.weight
|
||||
if (rule.lhs not in trees[(i, i + l - 1)]
|
||||
or rule_total_weight < trees[(i, i + l - 1)][rule.lhs].weight):
|
||||
trees[(i, i + l - 1)][rule.lhs] = RuleNode(rule, [r1_tree, r2_tree], weight=rule_total_weight)
|
||||
return table, trees
|
||||
|
||||
|
||||
# This section implements context-free grammar converter to Chomsky normal form.
|
||||
# It also implements a conversion of parse trees from its CNF to the original
|
||||
# grammar.
|
||||
# Overview:
|
||||
# Applies the following operations in this order:
|
||||
# * TERM: Eliminates non-solitary terminals from all rules
|
||||
# * BIN: Eliminates rules with more than 2 symbols on their right-hand-side.
|
||||
# * UNIT: Eliminates non-terminal unit rules
|
||||
#
|
||||
# The following grammar characteristics aren't featured:
|
||||
# * Start symbol appears on RHS
|
||||
# * Empty rules (epsilon rules)
|
||||
|
||||
|
||||
class CnfWrapper:
|
||||
"""CNF wrapper for grammar.
|
||||
|
||||
Validates that the input grammar is CNF and provides helper data structures.
|
||||
"""
|
||||
|
||||
def __init__(self, grammar):
|
||||
super(CnfWrapper, self).__init__()
|
||||
self.grammar = grammar
|
||||
self.rules = grammar.rules
|
||||
self.terminal_rules = defaultdict(list)
|
||||
self.nonterminal_rules = defaultdict(list)
|
||||
for r in self.rules:
|
||||
# Validate that the grammar is CNF and populate auxiliary data structures.
|
||||
assert isinstance(r.lhs, NT), r
|
||||
if len(r.rhs) not in [1, 2]:
|
||||
raise ParseError("CYK doesn't support empty rules")
|
||||
if len(r.rhs) == 1 and isinstance(r.rhs[0], T):
|
||||
self.terminal_rules[r.rhs[0]].append(r)
|
||||
elif len(r.rhs) == 2 and all(isinstance(x, NT) for x in r.rhs):
|
||||
self.nonterminal_rules[tuple(r.rhs)].append(r)
|
||||
else:
|
||||
assert False, r
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.grammar == other.grammar
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self.grammar)
|
||||
|
||||
|
||||
class UnitSkipRule(Rule):
|
||||
"""A rule that records NTs that were skipped during transformation."""
|
||||
|
||||
def __init__(self, lhs, rhs, skipped_rules, weight, alias):
|
||||
super(UnitSkipRule, self).__init__(lhs, rhs, weight, alias)
|
||||
self.skipped_rules = skipped_rules
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, type(self)) and self.skipped_rules == other.skipped_rules
|
||||
|
||||
__hash__ = Rule.__hash__
|
||||
|
||||
|
||||
def build_unit_skiprule(unit_rule, target_rule):
|
||||
skipped_rules = []
|
||||
if isinstance(unit_rule, UnitSkipRule):
|
||||
skipped_rules += unit_rule.skipped_rules
|
||||
skipped_rules.append(target_rule)
|
||||
if isinstance(target_rule, UnitSkipRule):
|
||||
skipped_rules += target_rule.skipped_rules
|
||||
return UnitSkipRule(unit_rule.lhs, target_rule.rhs, skipped_rules,
|
||||
weight=unit_rule.weight + target_rule.weight, alias=unit_rule.alias)
|
||||
|
||||
|
||||
def get_any_nt_unit_rule(g):
|
||||
"""Returns a non-terminal unit rule from 'g', or None if there is none."""
|
||||
for rule in g.rules:
|
||||
if len(rule.rhs) == 1 and isinstance(rule.rhs[0], NT):
|
||||
return rule
|
||||
return None
|
||||
|
||||
|
||||
def _remove_unit_rule(g, rule):
|
||||
"""Removes 'rule' from 'g' without changing the language produced by 'g'."""
|
||||
new_rules = [x for x in g.rules if x != rule]
|
||||
refs = [x for x in g.rules if x.lhs == rule.rhs[0]]
|
||||
new_rules += [build_unit_skiprule(rule, ref) for ref in refs]
|
||||
return Grammar(new_rules)
|
||||
|
||||
|
||||
def _split(rule):
|
||||
"""Splits a rule whose len(rhs) > 2 into shorter rules."""
|
||||
rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs)
|
||||
rule_name = '__SP_%s' % (rule_str) + '_%d'
|
||||
yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias)
|
||||
for i in range(1, len(rule.rhs) - 2):
|
||||
yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split')
|
||||
yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split')
|
||||
|
||||
|
||||
def _term(g):
|
||||
"""Applies the TERM rule on 'g' (see top comment)."""
|
||||
all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)}
|
||||
t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t}
|
||||
new_rules = []
|
||||
for rule in g.rules:
|
||||
if len(rule.rhs) > 1 and any(isinstance(x, T) for x in rule.rhs):
|
||||
new_rhs = [t_rules[x].lhs if isinstance(x, T) else x for x in rule.rhs]
|
||||
new_rules.append(Rule(rule.lhs, new_rhs, weight=rule.weight, alias=rule.alias))
|
||||
new_rules.extend(v for k, v in t_rules.items() if k in rule.rhs)
|
||||
else:
|
||||
new_rules.append(rule)
|
||||
return Grammar(new_rules)
|
||||
|
||||
|
||||
def _bin(g):
|
||||
"""Applies the BIN rule to 'g' (see top comment)."""
|
||||
new_rules = []
|
||||
for rule in g.rules:
|
||||
if len(rule.rhs) > 2:
|
||||
new_rules += _split(rule)
|
||||
else:
|
||||
new_rules.append(rule)
|
||||
return Grammar(new_rules)
|
||||
|
||||
|
||||
def _unit(g):
|
||||
"""Applies the UNIT rule to 'g' (see top comment)."""
|
||||
nt_unit_rule = get_any_nt_unit_rule(g)
|
||||
while nt_unit_rule:
|
||||
g = _remove_unit_rule(g, nt_unit_rule)
|
||||
nt_unit_rule = get_any_nt_unit_rule(g)
|
||||
return g
|
||||
|
||||
|
||||
def to_cnf(g):
|
||||
"""Creates a CNF grammar from a general context-free grammar 'g'."""
|
||||
g = _unit(_bin(_term(g)))
|
||||
return CnfWrapper(g)
|
||||
|
||||
|
||||
def unroll_unit_skiprule(lhs, orig_rhs, skipped_rules, children, weight, alias):
|
||||
if not skipped_rules:
|
||||
return RuleNode(Rule(lhs, orig_rhs, weight=weight, alias=alias), children, weight=weight)
|
||||
else:
|
||||
weight = weight - skipped_rules[0].weight
|
||||
return RuleNode(
|
||||
Rule(lhs, [skipped_rules[0].lhs], weight=weight, alias=alias), [
|
||||
unroll_unit_skiprule(skipped_rules[0].lhs, orig_rhs,
|
||||
skipped_rules[1:], children,
|
||||
skipped_rules[0].weight, skipped_rules[0].alias)
|
||||
], weight=weight)
|
||||
|
||||
|
||||
def revert_cnf(node):
|
||||
"""Reverts a parse tree (RuleNode) to its original non-CNF form (Node)."""
|
||||
if isinstance(node, T):
|
||||
return node
|
||||
# Reverts TERM rule.
|
||||
if node.rule.lhs.name.startswith('__T_'):
|
||||
return node.children[0]
|
||||
else:
|
||||
children = []
|
||||
for child in map(revert_cnf, node.children):
|
||||
# Reverts BIN rule.
|
||||
if isinstance(child, RuleNode) and child.rule.lhs.name.startswith('__SP_'):
|
||||
children += child.children
|
||||
else:
|
||||
children.append(child)
|
||||
# Reverts UNIT rule.
|
||||
if isinstance(node.rule, UnitSkipRule):
|
||||
return unroll_unit_skiprule(node.rule.lhs, node.rule.rhs,
|
||||
node.rule.skipped_rules, children,
|
||||
node.rule.weight, node.rule.alias)
|
||||
else:
|
||||
return RuleNode(node.rule, children)
|
||||
314
ccxt/static_dependencies/lark/parsers/earley.py
Normal file
314
ccxt/static_dependencies/lark/parsers/earley.py
Normal file
@@ -0,0 +1,314 @@
|
||||
"""This module implements an Earley parser.
|
||||
|
||||
The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
|
||||
https://www.sciencedirect.com/science/article/pii/S1571066108001497
|
||||
|
||||
That is probably the best reference for understanding the algorithm here.
|
||||
|
||||
The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
|
||||
is explained here: https://lark-parser.readthedocs.io/en/latest/_static/sppf/sppf.html
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Callable, Optional, List, Any
|
||||
from collections import deque
|
||||
|
||||
from ..lexer import Token
|
||||
from ..tree import Tree
|
||||
from ..exceptions import UnexpectedEOF, UnexpectedToken
|
||||
from ..utils import logger, OrderedSet, dedup_list
|
||||
from .grammar_analysis import GrammarAnalyzer
|
||||
from ..grammar import NonTerminal
|
||||
from .earley_common import Item
|
||||
from .earley_forest import ForestSumVisitor, SymbolNode, StableSymbolNode, TokenNode, ForestToParseTree
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..common import LexerConf, ParserConf
|
||||
|
||||
class Parser:
|
||||
lexer_conf: 'LexerConf'
|
||||
parser_conf: 'ParserConf'
|
||||
debug: bool
|
||||
|
||||
def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matcher: Callable,
|
||||
resolve_ambiguity: bool=True, debug: bool=False,
|
||||
tree_class: Optional[Callable[[str, List], Any]]=Tree, ordered_sets: bool=True):
|
||||
analysis = GrammarAnalyzer(parser_conf)
|
||||
self.lexer_conf = lexer_conf
|
||||
self.parser_conf = parser_conf
|
||||
self.resolve_ambiguity = resolve_ambiguity
|
||||
self.debug = debug
|
||||
self.Tree = tree_class
|
||||
self.Set = OrderedSet if ordered_sets else set
|
||||
self.SymbolNode = StableSymbolNode if ordered_sets else SymbolNode
|
||||
|
||||
self.FIRST = analysis.FIRST
|
||||
self.NULLABLE = analysis.NULLABLE
|
||||
self.callbacks = parser_conf.callbacks
|
||||
# TODO add typing info
|
||||
self.predictions = {} # type: ignore[var-annotated]
|
||||
|
||||
## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than
|
||||
# the slow 'isupper' in is_terminal.
|
||||
self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term }
|
||||
self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term }
|
||||
|
||||
self.forest_sum_visitor = None
|
||||
for rule in parser_conf.rules:
|
||||
if rule.origin not in self.predictions:
|
||||
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]
|
||||
|
||||
## Detect if any rules/terminals have priorities set. If the user specified priority = None, then
|
||||
# the priorities will be stripped from all rules/terminals before they reach us, allowing us to
|
||||
# skip the extra tree walk. We'll also skip this if the user just didn't specify priorities
|
||||
# on any rules/terminals.
|
||||
if self.forest_sum_visitor is None and rule.options.priority is not None:
|
||||
self.forest_sum_visitor = ForestSumVisitor
|
||||
|
||||
# Check terminals for priorities
|
||||
# Ignore terminal priorities if the basic lexer is used
|
||||
if self.lexer_conf.lexer_type != 'basic' and self.forest_sum_visitor is None:
|
||||
for term in self.lexer_conf.terminals:
|
||||
if term.priority:
|
||||
self.forest_sum_visitor = ForestSumVisitor
|
||||
break
|
||||
|
||||
self.term_matcher = term_matcher
|
||||
|
||||
|
||||
def predict_and_complete(self, i, to_scan, columns, transitives):
|
||||
"""The core Earley Predictor and Completer.
|
||||
|
||||
At each stage of the input, we handling any completed items (things
|
||||
that matched on the last cycle) and use those to predict what should
|
||||
come next in the input stream. The completions and any predicted
|
||||
non-terminals are recursively processed until we reach a set of,
|
||||
which can be added to the scan list for the next scanner cycle."""
|
||||
# Held Completions (H in E.Scotts paper).
|
||||
node_cache = {}
|
||||
held_completions = {}
|
||||
|
||||
column = columns[i]
|
||||
# R (items) = Ei (column.items)
|
||||
items = deque(column)
|
||||
while items:
|
||||
item = items.pop() # remove an element, A say, from R
|
||||
|
||||
### The Earley completer
|
||||
if item.is_complete: ### (item.s == string)
|
||||
if item.node is None:
|
||||
label = (item.s, item.start, i)
|
||||
item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
|
||||
item.node.add_family(item.s, item.rule, item.start, None, None)
|
||||
|
||||
# create_leo_transitives(item.rule.origin, item.start)
|
||||
|
||||
###R Joop Leo right recursion Completer
|
||||
if item.rule.origin in transitives[item.start]:
|
||||
transitive = transitives[item.start][item.s]
|
||||
if transitive.previous in transitives[transitive.column]:
|
||||
root_transitive = transitives[transitive.column][transitive.previous]
|
||||
else:
|
||||
root_transitive = transitive
|
||||
|
||||
new_item = Item(transitive.rule, transitive.ptr, transitive.start)
|
||||
label = (root_transitive.s, root_transitive.start, i)
|
||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
|
||||
new_item.node.add_path(root_transitive, item.node)
|
||||
if new_item.expect in self.TERMINALS:
|
||||
# Add (B :: aC.B, h, y) to Q
|
||||
to_scan.add(new_item)
|
||||
elif new_item not in column:
|
||||
# Add (B :: aC.B, h, y) to Ei and R
|
||||
column.add(new_item)
|
||||
items.append(new_item)
|
||||
###R Regular Earley completer
|
||||
else:
|
||||
# Empty has 0 length. If we complete an empty symbol in a particular
|
||||
# parse step, we need to be able to use that same empty symbol to complete
|
||||
# any predictions that result, that themselves require empty. Avoids
|
||||
# infinite recursion on empty symbols.
|
||||
# held_completions is 'H' in E.Scott's paper.
|
||||
is_empty_item = item.start == i
|
||||
if is_empty_item:
|
||||
held_completions[item.rule.origin] = item.node
|
||||
|
||||
originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s]
|
||||
for originator in originators:
|
||||
new_item = originator.advance()
|
||||
label = (new_item.s, originator.start, i)
|
||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
|
||||
new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node)
|
||||
if new_item.expect in self.TERMINALS:
|
||||
# Add (B :: aC.B, h, y) to Q
|
||||
to_scan.add(new_item)
|
||||
elif new_item not in column:
|
||||
# Add (B :: aC.B, h, y) to Ei and R
|
||||
column.add(new_item)
|
||||
items.append(new_item)
|
||||
|
||||
### The Earley predictor
|
||||
elif item.expect in self.NON_TERMINALS: ### (item.s == lr0)
|
||||
new_items = []
|
||||
for rule in self.predictions[item.expect]:
|
||||
new_item = Item(rule, 0, i)
|
||||
new_items.append(new_item)
|
||||
|
||||
# Process any held completions (H).
|
||||
if item.expect in held_completions:
|
||||
new_item = item.advance()
|
||||
label = (new_item.s, item.start, i)
|
||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
|
||||
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect])
|
||||
new_items.append(new_item)
|
||||
|
||||
for new_item in new_items:
|
||||
if new_item.expect in self.TERMINALS:
|
||||
to_scan.add(new_item)
|
||||
elif new_item not in column:
|
||||
column.add(new_item)
|
||||
items.append(new_item)
|
||||
|
||||
def _parse(self, lexer, columns, to_scan, start_symbol=None):
|
||||
|
||||
def is_quasi_complete(item):
|
||||
if item.is_complete:
|
||||
return True
|
||||
|
||||
quasi = item.advance()
|
||||
while not quasi.is_complete:
|
||||
if quasi.expect not in self.NULLABLE:
|
||||
return False
|
||||
if quasi.rule.origin == start_symbol and quasi.expect == start_symbol:
|
||||
return False
|
||||
quasi = quasi.advance()
|
||||
return True
|
||||
|
||||
# def create_leo_transitives(origin, start):
|
||||
# ... # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420
|
||||
|
||||
def scan(i, token, to_scan):
|
||||
"""The core Earley Scanner.
|
||||
|
||||
This is a custom implementation of the scanner that uses the
|
||||
Lark lexer to match tokens. The scan list is built by the
|
||||
Earley predictor, based on the previously completed tokens.
|
||||
This ensures that at each phase of the parse we have a custom
|
||||
lexer context, allowing for more complex ambiguities."""
|
||||
next_to_scan = self.Set()
|
||||
next_set = self.Set()
|
||||
columns.append(next_set)
|
||||
transitives.append({})
|
||||
node_cache = {}
|
||||
|
||||
for item in self.Set(to_scan):
|
||||
if match(item.expect, token):
|
||||
new_item = item.advance()
|
||||
label = (new_item.s, new_item.start, i)
|
||||
# 'terminals' may not contain token.type when using %declare
|
||||
# Additionally, token is not always a Token
|
||||
# For example, it can be a Tree when using TreeMatcher
|
||||
term = terminals.get(token.type) if isinstance(token, Token) else None
|
||||
# Set the priority of the token node to 0 so that the
|
||||
# terminal priorities do not affect the Tree chosen by
|
||||
# ForestSumVisitor after the basic lexer has already
|
||||
# "used up" the terminal priorities
|
||||
token_node = TokenNode(token, term, priority=0)
|
||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
|
||||
new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token_node)
|
||||
|
||||
if new_item.expect in self.TERMINALS:
|
||||
# add (B ::= Aai+1.B, h, y) to Q'
|
||||
next_to_scan.add(new_item)
|
||||
else:
|
||||
# add (B ::= Aa+1.B, h, y) to Ei+1
|
||||
next_set.add(new_item)
|
||||
|
||||
if not next_set and not next_to_scan:
|
||||
expect = {i.expect.name for i in to_scan}
|
||||
raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.s for i in to_scan))
|
||||
|
||||
return next_to_scan
|
||||
|
||||
|
||||
# Define parser functions
|
||||
match = self.term_matcher
|
||||
|
||||
terminals = self.lexer_conf.terminals_by_name
|
||||
|
||||
# Cache for nodes & tokens created in a particular parse step.
|
||||
transitives = [{}]
|
||||
|
||||
## The main Earley loop.
|
||||
# Run the Prediction/Completion cycle for any Items in the current Earley set.
|
||||
# Completions will be added to the SPPF tree, and predictions will be recursively
|
||||
# processed down to terminals/empty nodes to be added to the scanner for the next
|
||||
# step.
|
||||
expects = {i.expect for i in to_scan}
|
||||
i = 0
|
||||
for token in lexer.lex(expects):
|
||||
self.predict_and_complete(i, to_scan, columns, transitives)
|
||||
|
||||
to_scan = scan(i, token, to_scan)
|
||||
i += 1
|
||||
|
||||
expects.clear()
|
||||
expects |= {i.expect for i in to_scan}
|
||||
|
||||
self.predict_and_complete(i, to_scan, columns, transitives)
|
||||
|
||||
## Column is now the final column in the parse.
|
||||
assert i == len(columns)-1
|
||||
return to_scan
|
||||
|
||||
def parse(self, lexer, start):
|
||||
assert start, start
|
||||
start_symbol = NonTerminal(start)
|
||||
|
||||
columns = [self.Set()]
|
||||
to_scan = self.Set() # The scan buffer. 'Q' in E.Scott's paper.
|
||||
|
||||
## Predict for the start_symbol.
|
||||
# Add predicted items to the first Earley set (for the predictor) if they
|
||||
# result in a non-terminal, or the scanner if they result in a terminal.
|
||||
for rule in self.predictions[start_symbol]:
|
||||
item = Item(rule, 0, 0)
|
||||
if item.expect in self.TERMINALS:
|
||||
to_scan.add(item)
|
||||
else:
|
||||
columns[0].add(item)
|
||||
|
||||
to_scan = self._parse(lexer, columns, to_scan, start_symbol)
|
||||
|
||||
# If the parse was successful, the start
|
||||
# symbol should have been completed in the last step of the Earley cycle, and will be in
|
||||
# this column. Find the item for the start_symbol, which is the root of the SPPF tree.
|
||||
solutions = dedup_list(n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0)
|
||||
if not solutions:
|
||||
expected_terminals = [t.expect.name for t in to_scan]
|
||||
raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan))
|
||||
|
||||
if self.debug:
|
||||
from .earley_forest import ForestToPyDotVisitor
|
||||
try:
|
||||
debug_walker = ForestToPyDotVisitor()
|
||||
except ImportError:
|
||||
logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
|
||||
else:
|
||||
for i, s in enumerate(solutions):
|
||||
debug_walker.visit(s, f"sppf{i}.png")
|
||||
|
||||
|
||||
if self.Tree is not None:
|
||||
# Perform our SPPF -> AST conversion
|
||||
transformer = ForestToParseTree(self.Tree, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity)
|
||||
solutions = [transformer.transform(s) for s in solutions]
|
||||
|
||||
if len(solutions) > 1:
|
||||
t: Tree = self.Tree('_ambig', solutions)
|
||||
t.expand_kids_by_data('_ambig') # solutions may themselves be _ambig nodes
|
||||
return t
|
||||
return solutions[0]
|
||||
|
||||
# return the root of the SPPF
|
||||
# TODO return a list of solutions, or join them together somehow
|
||||
return solutions[0]
|
||||
42
ccxt/static_dependencies/lark/parsers/earley_common.py
Normal file
42
ccxt/static_dependencies/lark/parsers/earley_common.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""This module implements useful building blocks for the Earley parser
|
||||
"""
|
||||
|
||||
|
||||
class Item:
|
||||
"An Earley Item, the atom of the algorithm."
|
||||
|
||||
__slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash')
|
||||
def __init__(self, rule, ptr, start):
|
||||
self.is_complete = len(rule.expansion) == ptr
|
||||
self.rule = rule # rule
|
||||
self.ptr = ptr # ptr
|
||||
self.start = start # j
|
||||
self.node = None # w
|
||||
if self.is_complete:
|
||||
self.s = rule.origin
|
||||
self.expect = None
|
||||
self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
|
||||
else:
|
||||
self.s = (rule, ptr)
|
||||
self.expect = rule.expansion[ptr]
|
||||
self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
|
||||
self._hash = hash((self.s, self.start, self.rule))
|
||||
|
||||
def advance(self):
|
||||
return Item(self.rule, self.ptr + 1, self.start)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self is other or (self.s == other.s and self.start == other.start and self.rule == other.rule)
|
||||
|
||||
def __hash__(self):
|
||||
return self._hash
|
||||
|
||||
def __repr__(self):
|
||||
before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
|
||||
after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
|
||||
symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after))
|
||||
return '%s (%d)' % (symbol, self.start)
|
||||
|
||||
|
||||
# class TransitiveItem(Item):
|
||||
# ... # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420
|
||||
801
ccxt/static_dependencies/lark/parsers/earley_forest.py
Normal file
801
ccxt/static_dependencies/lark/parsers/earley_forest.py
Normal file
@@ -0,0 +1,801 @@
|
||||
""""This module implements an SPPF implementation
|
||||
|
||||
This is used as the primary output mechanism for the Earley parser
|
||||
in order to store complex ambiguities.
|
||||
|
||||
Full reference and more details is here:
|
||||
https://web.archive.org/web/20190616123959/http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
|
||||
"""
|
||||
|
||||
from typing import Type, AbstractSet
|
||||
from random import randint
|
||||
from collections import deque
|
||||
from operator import attrgetter
|
||||
from importlib import import_module
|
||||
from functools import partial
|
||||
|
||||
from ..parse_tree_builder import AmbiguousIntermediateExpander
|
||||
from ..visitors import Discard
|
||||
from ..utils import logger, OrderedSet
|
||||
from ..tree import Tree
|
||||
|
||||
class ForestNode:
|
||||
pass
|
||||
|
||||
class SymbolNode(ForestNode):
|
||||
"""
|
||||
A Symbol Node represents a symbol (or Intermediate LR0).
|
||||
|
||||
Symbol nodes are keyed by the symbol (s). For intermediate nodes
|
||||
s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol
|
||||
nodes, s will be a string representing the non-terminal origin (i.e.
|
||||
the left hand side of the rule).
|
||||
|
||||
The children of a Symbol or Intermediate Node will always be Packed Nodes;
|
||||
with each Packed Node child representing a single derivation of a production.
|
||||
|
||||
Hence a Symbol Node with a single child is unambiguous.
|
||||
|
||||
Parameters:
|
||||
s: A Symbol, or a tuple of (rule, ptr) for an intermediate node.
|
||||
start: For dynamic lexers, the index of the start of the substring matched by this symbol (inclusive).
|
||||
end: For dynamic lexers, the index of the end of the substring matched by this symbol (exclusive).
|
||||
|
||||
Properties:
|
||||
is_intermediate: True if this node is an intermediate node.
|
||||
priority: The priority of the node's symbol.
|
||||
"""
|
||||
Set: Type[AbstractSet] = set # Overridden by StableSymbolNode
|
||||
__slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate')
|
||||
def __init__(self, s, start, end):
|
||||
self.s = s
|
||||
self.start = start
|
||||
self.end = end
|
||||
self._children = self.Set()
|
||||
self.paths = self.Set()
|
||||
self.paths_loaded = False
|
||||
|
||||
### We use inf here as it can be safely negated without resorting to conditionals,
|
||||
# unlike None or float('NaN'), and sorts appropriately.
|
||||
self.priority = float('-inf')
|
||||
self.is_intermediate = isinstance(s, tuple)
|
||||
|
||||
def add_family(self, lr0, rule, start, left, right):
|
||||
self._children.add(PackedNode(self, lr0, rule, start, left, right))
|
||||
|
||||
def add_path(self, transitive, node):
|
||||
self.paths.add((transitive, node))
|
||||
|
||||
def load_paths(self):
|
||||
for transitive, node in self.paths:
|
||||
if transitive.next_titem is not None:
|
||||
vn = type(self)(transitive.next_titem.s, transitive.next_titem.start, self.end)
|
||||
vn.add_path(transitive.next_titem, node)
|
||||
self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn)
|
||||
else:
|
||||
self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node)
|
||||
self.paths_loaded = True
|
||||
|
||||
@property
|
||||
def is_ambiguous(self):
|
||||
"""Returns True if this node is ambiguous."""
|
||||
return len(self.children) > 1
|
||||
|
||||
@property
|
||||
def children(self):
|
||||
"""Returns a list of this node's children sorted from greatest to
|
||||
least priority."""
|
||||
if not self.paths_loaded:
|
||||
self.load_paths()
|
||||
return sorted(self._children, key=attrgetter('sort_key'))
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._children)
|
||||
|
||||
def __repr__(self):
|
||||
if self.is_intermediate:
|
||||
rule = self.s[0]
|
||||
ptr = self.s[1]
|
||||
before = ( expansion.name for expansion in rule.expansion[:ptr] )
|
||||
after = ( expansion.name for expansion in rule.expansion[ptr:] )
|
||||
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
|
||||
else:
|
||||
symbol = self.s.name
|
||||
return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority)
|
||||
|
||||
class StableSymbolNode(SymbolNode):
|
||||
"A version of SymbolNode that uses OrderedSet for output stability"
|
||||
Set = OrderedSet
|
||||
|
||||
class PackedNode(ForestNode):
|
||||
"""
|
||||
A Packed Node represents a single derivation in a symbol node.
|
||||
|
||||
Parameters:
|
||||
rule: The rule associated with this node.
|
||||
parent: The parent of this node.
|
||||
left: The left child of this node. ``None`` if one does not exist.
|
||||
right: The right child of this node. ``None`` if one does not exist.
|
||||
priority: The priority of this node.
|
||||
"""
|
||||
__slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash')
|
||||
def __init__(self, parent, s, rule, start, left, right):
|
||||
self.parent = parent
|
||||
self.s = s
|
||||
self.start = start
|
||||
self.rule = rule
|
||||
self.left = left
|
||||
self.right = right
|
||||
self.priority = float('-inf')
|
||||
self._hash = hash((self.left, self.right))
|
||||
|
||||
@property
|
||||
def is_empty(self):
|
||||
return self.left is None and self.right is None
|
||||
|
||||
@property
|
||||
def sort_key(self):
|
||||
"""
|
||||
Used to sort PackedNode children of SymbolNodes.
|
||||
A SymbolNode has multiple PackedNodes if it matched
|
||||
ambiguously. Hence, we use the sort order to identify
|
||||
the order in which ambiguous children should be considered.
|
||||
"""
|
||||
return self.is_empty, -self.priority, self.rule.order
|
||||
|
||||
@property
|
||||
def children(self):
|
||||
"""Returns a list of this node's children."""
|
||||
return [x for x in [self.left, self.right] if x is not None]
|
||||
|
||||
def __iter__(self):
|
||||
yield self.left
|
||||
yield self.right
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, PackedNode):
|
||||
return False
|
||||
return self is other or (self.left == other.left and self.right == other.right)
|
||||
|
||||
def __hash__(self):
|
||||
return self._hash
|
||||
|
||||
def __repr__(self):
|
||||
if isinstance(self.s, tuple):
|
||||
rule = self.s[0]
|
||||
ptr = self.s[1]
|
||||
before = ( expansion.name for expansion in rule.expansion[:ptr] )
|
||||
after = ( expansion.name for expansion in rule.expansion[ptr:] )
|
||||
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
|
||||
else:
|
||||
symbol = self.s.name
|
||||
return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order)
|
||||
|
||||
class TokenNode(ForestNode):
|
||||
"""
|
||||
A Token Node represents a matched terminal and is always a leaf node.
|
||||
|
||||
Parameters:
|
||||
token: The Token associated with this node.
|
||||
term: The TerminalDef matched by the token.
|
||||
priority: The priority of this node.
|
||||
"""
|
||||
__slots__ = ('token', 'term', 'priority', '_hash')
|
||||
def __init__(self, token, term, priority=None):
|
||||
self.token = token
|
||||
self.term = term
|
||||
if priority is not None:
|
||||
self.priority = priority
|
||||
else:
|
||||
self.priority = term.priority if term is not None else 0
|
||||
self._hash = hash(token)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, TokenNode):
|
||||
return False
|
||||
return self is other or (self.token == other.token)
|
||||
|
||||
def __hash__(self):
|
||||
return self._hash
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self.token)
|
||||
|
||||
class ForestVisitor:
|
||||
"""
|
||||
An abstract base class for building forest visitors.
|
||||
|
||||
This class performs a controllable depth-first walk of an SPPF.
|
||||
The visitor will not enter cycles and will backtrack if one is encountered.
|
||||
Subclasses are notified of cycles through the ``on_cycle`` method.
|
||||
|
||||
Behavior for visit events is defined by overriding the
|
||||
``visit*node*`` functions.
|
||||
|
||||
The walk is controlled by the return values of the ``visit*node_in``
|
||||
methods. Returning a node(s) will schedule them to be visited. The visitor
|
||||
will begin to backtrack if no nodes are returned.
|
||||
|
||||
Parameters:
|
||||
single_visit: If ``True``, non-Token nodes will only be visited once.
|
||||
"""
|
||||
|
||||
def __init__(self, single_visit=False):
|
||||
self.single_visit = single_visit
|
||||
|
||||
def visit_token_node(self, node):
|
||||
"""Called when a ``Token`` is visited. ``Token`` nodes are always leaves."""
|
||||
pass
|
||||
|
||||
def visit_symbol_node_in(self, node):
|
||||
"""Called when a symbol node is visited. Nodes that are returned
|
||||
will be scheduled to be visited. If ``visit_intermediate_node_in``
|
||||
is not implemented, this function will be called for intermediate
|
||||
nodes as well."""
|
||||
pass
|
||||
|
||||
def visit_symbol_node_out(self, node):
|
||||
"""Called after all nodes returned from a corresponding ``visit_symbol_node_in``
|
||||
call have been visited. If ``visit_intermediate_node_out``
|
||||
is not implemented, this function will be called for intermediate
|
||||
nodes as well."""
|
||||
pass
|
||||
|
||||
def visit_packed_node_in(self, node):
|
||||
"""Called when a packed node is visited. Nodes that are returned
|
||||
will be scheduled to be visited. """
|
||||
pass
|
||||
|
||||
def visit_packed_node_out(self, node):
|
||||
"""Called after all nodes returned from a corresponding ``visit_packed_node_in``
|
||||
call have been visited."""
|
||||
pass
|
||||
|
||||
def on_cycle(self, node, path):
|
||||
"""Called when a cycle is encountered.
|
||||
|
||||
Parameters:
|
||||
node: The node that causes a cycle.
|
||||
path: The list of nodes being visited: nodes that have been
|
||||
entered but not exited. The first element is the root in a forest
|
||||
visit, and the last element is the node visited most recently.
|
||||
``path`` should be treated as read-only.
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_cycle_in_path(self, node, path):
|
||||
"""A utility function for use in ``on_cycle`` to obtain a slice of
|
||||
``path`` that only contains the nodes that make up the cycle."""
|
||||
index = len(path) - 1
|
||||
while id(path[index]) != id(node):
|
||||
index -= 1
|
||||
return path[index:]
|
||||
|
||||
def visit(self, root):
|
||||
# Visiting is a list of IDs of all symbol/intermediate nodes currently in
|
||||
# the stack. It serves two purposes: to detect when we 'recurse' in and out
|
||||
# of a symbol/intermediate so that we can process both up and down. Also,
|
||||
# since the SPPF can have cycles it allows us to detect if we're trying
|
||||
# to recurse into a node that's already on the stack (infinite recursion).
|
||||
visiting = set()
|
||||
|
||||
# set of all nodes that have been visited
|
||||
visited = set()
|
||||
|
||||
# a list of nodes that are currently being visited
|
||||
# used for the `on_cycle` callback
|
||||
path = []
|
||||
|
||||
# We do not use recursion here to walk the Forest due to the limited
|
||||
# stack size in python. Therefore input_stack is essentially our stack.
|
||||
input_stack = deque([root])
|
||||
|
||||
# It is much faster to cache these as locals since they are called
|
||||
# many times in large parses.
|
||||
vpno = getattr(self, 'visit_packed_node_out')
|
||||
vpni = getattr(self, 'visit_packed_node_in')
|
||||
vsno = getattr(self, 'visit_symbol_node_out')
|
||||
vsni = getattr(self, 'visit_symbol_node_in')
|
||||
vino = getattr(self, 'visit_intermediate_node_out', vsno)
|
||||
vini = getattr(self, 'visit_intermediate_node_in', vsni)
|
||||
vtn = getattr(self, 'visit_token_node')
|
||||
oc = getattr(self, 'on_cycle')
|
||||
|
||||
while input_stack:
|
||||
current = next(reversed(input_stack))
|
||||
try:
|
||||
next_node = next(current)
|
||||
except StopIteration:
|
||||
input_stack.pop()
|
||||
continue
|
||||
except TypeError:
|
||||
### If the current object is not an iterator, pass through to Token/SymbolNode
|
||||
pass
|
||||
else:
|
||||
if next_node is None:
|
||||
continue
|
||||
|
||||
if id(next_node) in visiting:
|
||||
oc(next_node, path)
|
||||
continue
|
||||
|
||||
input_stack.append(next_node)
|
||||
continue
|
||||
|
||||
if isinstance(current, TokenNode):
|
||||
vtn(current.token)
|
||||
input_stack.pop()
|
||||
continue
|
||||
|
||||
current_id = id(current)
|
||||
if current_id in visiting:
|
||||
if isinstance(current, PackedNode):
|
||||
vpno(current)
|
||||
elif current.is_intermediate:
|
||||
vino(current)
|
||||
else:
|
||||
vsno(current)
|
||||
input_stack.pop()
|
||||
path.pop()
|
||||
visiting.remove(current_id)
|
||||
visited.add(current_id)
|
||||
elif self.single_visit and current_id in visited:
|
||||
input_stack.pop()
|
||||
else:
|
||||
visiting.add(current_id)
|
||||
path.append(current)
|
||||
if isinstance(current, PackedNode):
|
||||
next_node = vpni(current)
|
||||
elif current.is_intermediate:
|
||||
next_node = vini(current)
|
||||
else:
|
||||
next_node = vsni(current)
|
||||
if next_node is None:
|
||||
continue
|
||||
|
||||
if not isinstance(next_node, ForestNode):
|
||||
next_node = iter(next_node)
|
||||
elif id(next_node) in visiting:
|
||||
oc(next_node, path)
|
||||
continue
|
||||
|
||||
input_stack.append(next_node)
|
||||
|
||||
class ForestTransformer(ForestVisitor):
|
||||
"""The base class for a bottom-up forest transformation. Most users will
|
||||
want to use ``TreeForestTransformer`` instead as it has a friendlier
|
||||
interface and covers most use cases.
|
||||
|
||||
Transformations are applied via inheritance and overriding of the
|
||||
``transform*node`` methods.
|
||||
|
||||
``transform_token_node`` receives a ``Token`` as an argument.
|
||||
All other methods receive the node that is being transformed and
|
||||
a list of the results of the transformations of that node's children.
|
||||
The return value of these methods are the resulting transformations.
|
||||
|
||||
If ``Discard`` is raised in a node's transformation, no data from that node
|
||||
will be passed to its parent's transformation.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(ForestTransformer, self).__init__()
|
||||
# results of transformations
|
||||
self.data = dict()
|
||||
# used to track parent nodes
|
||||
self.node_stack = deque()
|
||||
|
||||
def transform(self, root):
|
||||
"""Perform a transformation on an SPPF."""
|
||||
self.node_stack.append('result')
|
||||
self.data['result'] = []
|
||||
self.visit(root)
|
||||
assert len(self.data['result']) <= 1
|
||||
if self.data['result']:
|
||||
return self.data['result'][0]
|
||||
|
||||
def transform_symbol_node(self, node, data):
|
||||
"""Transform a symbol node."""
|
||||
return node
|
||||
|
||||
def transform_intermediate_node(self, node, data):
|
||||
"""Transform an intermediate node."""
|
||||
return node
|
||||
|
||||
def transform_packed_node(self, node, data):
|
||||
"""Transform a packed node."""
|
||||
return node
|
||||
|
||||
def transform_token_node(self, node):
|
||||
"""Transform a ``Token``."""
|
||||
return node
|
||||
|
||||
def visit_symbol_node_in(self, node):
|
||||
self.node_stack.append(id(node))
|
||||
self.data[id(node)] = []
|
||||
return node.children
|
||||
|
||||
def visit_packed_node_in(self, node):
|
||||
self.node_stack.append(id(node))
|
||||
self.data[id(node)] = []
|
||||
return node.children
|
||||
|
||||
def visit_token_node(self, node):
|
||||
transformed = self.transform_token_node(node)
|
||||
if transformed is not Discard:
|
||||
self.data[self.node_stack[-1]].append(transformed)
|
||||
|
||||
def _visit_node_out_helper(self, node, method):
|
||||
self.node_stack.pop()
|
||||
transformed = method(node, self.data[id(node)])
|
||||
if transformed is not Discard:
|
||||
self.data[self.node_stack[-1]].append(transformed)
|
||||
del self.data[id(node)]
|
||||
|
||||
def visit_symbol_node_out(self, node):
|
||||
self._visit_node_out_helper(node, self.transform_symbol_node)
|
||||
|
||||
def visit_intermediate_node_out(self, node):
|
||||
self._visit_node_out_helper(node, self.transform_intermediate_node)
|
||||
|
||||
def visit_packed_node_out(self, node):
|
||||
self._visit_node_out_helper(node, self.transform_packed_node)
|
||||
|
||||
|
||||
class ForestSumVisitor(ForestVisitor):
|
||||
"""
|
||||
A visitor for prioritizing ambiguous parts of the Forest.
|
||||
|
||||
This visitor is used when support for explicit priorities on
|
||||
rules is requested (whether normal, or invert). It walks the
|
||||
forest (or subsets thereof) and cascades properties upwards
|
||||
from the leaves.
|
||||
|
||||
It would be ideal to do this during parsing, however this would
|
||||
require processing each Earley item multiple times. That's
|
||||
a big performance drawback; so running a forest walk is the
|
||||
lesser of two evils: there can be significantly more Earley
|
||||
items created during parsing than there are SPPF nodes in the
|
||||
final tree.
|
||||
"""
|
||||
def __init__(self):
|
||||
super(ForestSumVisitor, self).__init__(single_visit=True)
|
||||
|
||||
def visit_packed_node_in(self, node):
|
||||
yield node.left
|
||||
yield node.right
|
||||
|
||||
def visit_symbol_node_in(self, node):
|
||||
return iter(node.children)
|
||||
|
||||
def visit_packed_node_out(self, node):
|
||||
priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options.priority else 0
|
||||
priority += getattr(node.right, 'priority', 0)
|
||||
priority += getattr(node.left, 'priority', 0)
|
||||
node.priority = priority
|
||||
|
||||
def visit_symbol_node_out(self, node):
|
||||
node.priority = max(child.priority for child in node.children)
|
||||
|
||||
class PackedData():
|
||||
"""Used in transformationss of packed nodes to distinguish the data
|
||||
that comes from the left child and the right child.
|
||||
"""
|
||||
|
||||
class _NoData():
|
||||
pass
|
||||
|
||||
NO_DATA = _NoData()
|
||||
|
||||
def __init__(self, node, data):
|
||||
self.left = self.NO_DATA
|
||||
self.right = self.NO_DATA
|
||||
if data:
|
||||
if node.left is not None:
|
||||
self.left = data[0]
|
||||
if len(data) > 1:
|
||||
self.right = data[1]
|
||||
else:
|
||||
self.right = data[0]
|
||||
|
||||
class ForestToParseTree(ForestTransformer):
|
||||
"""Used by the earley parser when ambiguity equals 'resolve' or
|
||||
'explicit'. Transforms an SPPF into an (ambiguous) parse tree.
|
||||
|
||||
Parameters:
|
||||
tree_class: The tree class to use for construction
|
||||
callbacks: A dictionary of rules to functions that output a tree
|
||||
prioritizer: A ``ForestVisitor`` that manipulates the priorities of ForestNodes
|
||||
resolve_ambiguity: If True, ambiguities will be resolved based on
|
||||
priorities. Otherwise, `_ambig` nodes will be in the resulting tree.
|
||||
use_cache: If True, the results of packed node transformations will be cached.
|
||||
"""
|
||||
|
||||
def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=True):
|
||||
super(ForestToParseTree, self).__init__()
|
||||
self.tree_class = tree_class
|
||||
self.callbacks = callbacks
|
||||
self.prioritizer = prioritizer
|
||||
self.resolve_ambiguity = resolve_ambiguity
|
||||
self._use_cache = use_cache
|
||||
self._cache = {}
|
||||
self._on_cycle_retreat = False
|
||||
self._cycle_node = None
|
||||
self._successful_visits = set()
|
||||
|
||||
def visit(self, root):
|
||||
if self.prioritizer:
|
||||
self.prioritizer.visit(root)
|
||||
super(ForestToParseTree, self).visit(root)
|
||||
self._cache = {}
|
||||
|
||||
def on_cycle(self, node, path):
|
||||
logger.debug("Cycle encountered in the SPPF at node: %s. "
|
||||
"As infinite ambiguities cannot be represented in a tree, "
|
||||
"this family of derivations will be discarded.", node)
|
||||
self._cycle_node = node
|
||||
self._on_cycle_retreat = True
|
||||
|
||||
def _check_cycle(self, node):
|
||||
if self._on_cycle_retreat:
|
||||
if id(node) == id(self._cycle_node) or id(node) in self._successful_visits:
|
||||
self._cycle_node = None
|
||||
self._on_cycle_retreat = False
|
||||
else:
|
||||
return Discard
|
||||
|
||||
def _collapse_ambig(self, children):
|
||||
new_children = []
|
||||
for child in children:
|
||||
if hasattr(child, 'data') and child.data == '_ambig':
|
||||
new_children += child.children
|
||||
else:
|
||||
new_children.append(child)
|
||||
return new_children
|
||||
|
||||
def _call_rule_func(self, node, data):
|
||||
# called when transforming children of symbol nodes
|
||||
# data is a list of trees or tokens that correspond to the
|
||||
# symbol's rule expansion
|
||||
return self.callbacks[node.rule](data)
|
||||
|
||||
def _call_ambig_func(self, node, data):
|
||||
# called when transforming a symbol node
|
||||
# data is a list of trees where each tree's data is
|
||||
# equal to the name of the symbol or one of its aliases.
|
||||
if len(data) > 1:
|
||||
return self.tree_class('_ambig', data)
|
||||
elif data:
|
||||
return data[0]
|
||||
return Discard
|
||||
|
||||
def transform_symbol_node(self, node, data):
|
||||
if id(node) not in self._successful_visits:
|
||||
return Discard
|
||||
r = self._check_cycle(node)
|
||||
if r is Discard:
|
||||
return r
|
||||
self._successful_visits.remove(id(node))
|
||||
data = self._collapse_ambig(data)
|
||||
return self._call_ambig_func(node, data)
|
||||
|
||||
def transform_intermediate_node(self, node, data):
|
||||
if id(node) not in self._successful_visits:
|
||||
return Discard
|
||||
r = self._check_cycle(node)
|
||||
if r is Discard:
|
||||
return r
|
||||
self._successful_visits.remove(id(node))
|
||||
if len(data) > 1:
|
||||
children = [self.tree_class('_inter', c) for c in data]
|
||||
return self.tree_class('_iambig', children)
|
||||
return data[0]
|
||||
|
||||
def transform_packed_node(self, node, data):
|
||||
r = self._check_cycle(node)
|
||||
if r is Discard:
|
||||
return r
|
||||
if self.resolve_ambiguity and id(node.parent) in self._successful_visits:
|
||||
return Discard
|
||||
if self._use_cache and id(node) in self._cache:
|
||||
return self._cache[id(node)]
|
||||
children = []
|
||||
assert len(data) <= 2
|
||||
data = PackedData(node, data)
|
||||
if data.left is not PackedData.NO_DATA:
|
||||
if node.left.is_intermediate and isinstance(data.left, list):
|
||||
children += data.left
|
||||
else:
|
||||
children.append(data.left)
|
||||
if data.right is not PackedData.NO_DATA:
|
||||
children.append(data.right)
|
||||
if node.parent.is_intermediate:
|
||||
return self._cache.setdefault(id(node), children)
|
||||
return self._cache.setdefault(id(node), self._call_rule_func(node, children))
|
||||
|
||||
def visit_symbol_node_in(self, node):
|
||||
super(ForestToParseTree, self).visit_symbol_node_in(node)
|
||||
if self._on_cycle_retreat:
|
||||
return
|
||||
return node.children
|
||||
|
||||
def visit_packed_node_in(self, node):
|
||||
self._on_cycle_retreat = False
|
||||
to_visit = super(ForestToParseTree, self).visit_packed_node_in(node)
|
||||
if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits:
|
||||
if not self._use_cache or id(node) not in self._cache:
|
||||
return to_visit
|
||||
|
||||
def visit_packed_node_out(self, node):
|
||||
super(ForestToParseTree, self).visit_packed_node_out(node)
|
||||
if not self._on_cycle_retreat:
|
||||
self._successful_visits.add(id(node.parent))
|
||||
|
||||
def handles_ambiguity(func):
|
||||
"""Decorator for methods of subclasses of ``TreeForestTransformer``.
|
||||
Denotes that the method should receive a list of transformed derivations."""
|
||||
func.handles_ambiguity = True
|
||||
return func
|
||||
|
||||
class TreeForestTransformer(ForestToParseTree):
|
||||
"""A ``ForestTransformer`` with a tree ``Transformer``-like interface.
|
||||
By default, it will construct a tree.
|
||||
|
||||
Methods provided via inheritance are called based on the rule/symbol
|
||||
names of nodes in the forest.
|
||||
|
||||
Methods that act on rules will receive a list of the results of the
|
||||
transformations of the rule's children. By default, trees and tokens.
|
||||
|
||||
Methods that act on tokens will receive a token.
|
||||
|
||||
Alternatively, methods that act on rules may be annotated with
|
||||
``handles_ambiguity``. In this case, the function will receive a list
|
||||
of all the transformations of all the derivations of the rule.
|
||||
By default, a list of trees where each tree.data is equal to the
|
||||
rule name or one of its aliases.
|
||||
|
||||
Non-tree transformations are made possible by override of
|
||||
``__default__``, ``__default_token__``, and ``__default_ambig__``.
|
||||
|
||||
Note:
|
||||
Tree shaping features such as inlined rules and token filtering are
|
||||
not built into the transformation. Positions are also not propagated.
|
||||
|
||||
Parameters:
|
||||
tree_class: The tree class to use for construction
|
||||
prioritizer: A ``ForestVisitor`` that manipulates the priorities of nodes in the SPPF.
|
||||
resolve_ambiguity: If True, ambiguities will be resolved based on priorities.
|
||||
use_cache (bool): If True, caches the results of some transformations,
|
||||
potentially improving performance when ``resolve_ambiguity==False``.
|
||||
Only use if you know what you are doing: i.e. All transformation
|
||||
functions are pure and referentially transparent.
|
||||
"""
|
||||
|
||||
def __init__(self, tree_class=Tree, prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=False):
|
||||
super(TreeForestTransformer, self).__init__(tree_class, dict(), prioritizer, resolve_ambiguity, use_cache)
|
||||
|
||||
def __default__(self, name, data):
|
||||
"""Default operation on tree (for override).
|
||||
|
||||
Returns a tree with name with data as children.
|
||||
"""
|
||||
return self.tree_class(name, data)
|
||||
|
||||
def __default_ambig__(self, name, data):
|
||||
"""Default operation on ambiguous rule (for override).
|
||||
|
||||
Wraps data in an '_ambig_' node if it contains more than
|
||||
one element.
|
||||
"""
|
||||
if len(data) > 1:
|
||||
return self.tree_class('_ambig', data)
|
||||
elif data:
|
||||
return data[0]
|
||||
return Discard
|
||||
|
||||
def __default_token__(self, node):
|
||||
"""Default operation on ``Token`` (for override).
|
||||
|
||||
Returns ``node``.
|
||||
"""
|
||||
return node
|
||||
|
||||
def transform_token_node(self, node):
|
||||
return getattr(self, node.type, self.__default_token__)(node)
|
||||
|
||||
def _call_rule_func(self, node, data):
|
||||
name = node.rule.alias or node.rule.options.template_source or node.rule.origin.name
|
||||
user_func = getattr(self, name, self.__default__)
|
||||
if user_func == self.__default__ or hasattr(user_func, 'handles_ambiguity'):
|
||||
user_func = partial(self.__default__, name)
|
||||
if not self.resolve_ambiguity:
|
||||
wrapper = partial(AmbiguousIntermediateExpander, self.tree_class)
|
||||
user_func = wrapper(user_func)
|
||||
return user_func(data)
|
||||
|
||||
def _call_ambig_func(self, node, data):
|
||||
name = node.s.name
|
||||
user_func = getattr(self, name, self.__default_ambig__)
|
||||
if user_func == self.__default_ambig__ or not hasattr(user_func, 'handles_ambiguity'):
|
||||
user_func = partial(self.__default_ambig__, name)
|
||||
return user_func(data)
|
||||
|
||||
class ForestToPyDotVisitor(ForestVisitor):
|
||||
"""
|
||||
A Forest visitor which writes the SPPF to a PNG.
|
||||
|
||||
The SPPF can get really large, really quickly because
|
||||
of the amount of meta-data it stores, so this is probably
|
||||
only useful for trivial trees and learning how the SPPF
|
||||
is structured.
|
||||
"""
|
||||
def __init__(self, rankdir="TB"):
|
||||
super(ForestToPyDotVisitor, self).__init__(single_visit=True)
|
||||
self.pydot = import_module('pydot')
|
||||
self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir)
|
||||
|
||||
def visit(self, root, filename):
|
||||
super(ForestToPyDotVisitor, self).visit(root)
|
||||
try:
|
||||
self.graph.write_png(filename)
|
||||
except FileNotFoundError as e:
|
||||
logger.error("Could not write png: ", e)
|
||||
|
||||
def visit_token_node(self, node):
|
||||
graph_node_id = str(id(node))
|
||||
graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"'))
|
||||
graph_node_color = 0x808080
|
||||
graph_node_style = "\"filled,rounded\""
|
||||
graph_node_shape = "diamond"
|
||||
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
|
||||
self.graph.add_node(graph_node)
|
||||
|
||||
def visit_packed_node_in(self, node):
|
||||
graph_node_id = str(id(node))
|
||||
graph_node_label = repr(node)
|
||||
graph_node_color = 0x808080
|
||||
graph_node_style = "filled"
|
||||
graph_node_shape = "diamond"
|
||||
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
|
||||
self.graph.add_node(graph_node)
|
||||
yield node.left
|
||||
yield node.right
|
||||
|
||||
def visit_packed_node_out(self, node):
|
||||
graph_node_id = str(id(node))
|
||||
graph_node = self.graph.get_node(graph_node_id)[0]
|
||||
for child in [node.left, node.right]:
|
||||
if child is not None:
|
||||
child_graph_node_id = str(id(child.token if isinstance(child, TokenNode) else child))
|
||||
child_graph_node = self.graph.get_node(child_graph_node_id)[0]
|
||||
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
|
||||
else:
|
||||
#### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay.
|
||||
child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890))
|
||||
child_graph_node_style = "invis"
|
||||
child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None")
|
||||
child_edge_style = "invis"
|
||||
self.graph.add_node(child_graph_node)
|
||||
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style))
|
||||
|
||||
def visit_symbol_node_in(self, node):
|
||||
graph_node_id = str(id(node))
|
||||
graph_node_label = repr(node)
|
||||
graph_node_color = 0x808080
|
||||
graph_node_style = "\"filled\""
|
||||
if node.is_intermediate:
|
||||
graph_node_shape = "ellipse"
|
||||
else:
|
||||
graph_node_shape = "rectangle"
|
||||
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
|
||||
self.graph.add_node(graph_node)
|
||||
return iter(node.children)
|
||||
|
||||
def visit_symbol_node_out(self, node):
|
||||
graph_node_id = str(id(node))
|
||||
graph_node = self.graph.get_node(graph_node_id)[0]
|
||||
for child in node.children:
|
||||
child_graph_node_id = str(id(child))
|
||||
child_graph_node = self.graph.get_node(child_graph_node_id)[0]
|
||||
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
|
||||
203
ccxt/static_dependencies/lark/parsers/grammar_analysis.py
Normal file
203
ccxt/static_dependencies/lark/parsers/grammar_analysis.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"Provides for superficial grammar analysis."
|
||||
|
||||
from collections import Counter, defaultdict
|
||||
from typing import List, Dict, Iterator, FrozenSet, Set
|
||||
|
||||
from ..utils import bfs, fzset, classify
|
||||
from ..exceptions import GrammarError
|
||||
from ..grammar import Rule, Terminal, NonTerminal, Symbol
|
||||
from ..common import ParserConf
|
||||
|
||||
|
||||
class RulePtr:
|
||||
__slots__ = ('rule', 'index')
|
||||
rule: Rule
|
||||
index: int
|
||||
|
||||
def __init__(self, rule: Rule, index: int):
|
||||
assert isinstance(rule, Rule)
|
||||
assert index <= len(rule.expansion)
|
||||
self.rule = rule
|
||||
self.index = index
|
||||
|
||||
def __repr__(self):
|
||||
before = [x.name for x in self.rule.expansion[:self.index]]
|
||||
after = [x.name for x in self.rule.expansion[self.index:]]
|
||||
return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
|
||||
|
||||
@property
|
||||
def next(self) -> Symbol:
|
||||
return self.rule.expansion[self.index]
|
||||
|
||||
def advance(self, sym: Symbol) -> 'RulePtr':
|
||||
assert self.next == sym
|
||||
return RulePtr(self.rule, self.index+1)
|
||||
|
||||
@property
|
||||
def is_satisfied(self) -> bool:
|
||||
return self.index == len(self.rule.expansion)
|
||||
|
||||
def __eq__(self, other) -> bool:
|
||||
if not isinstance(other, RulePtr):
|
||||
return NotImplemented
|
||||
return self.rule == other.rule and self.index == other.index
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash((self.rule, self.index))
|
||||
|
||||
|
||||
State = FrozenSet[RulePtr]
|
||||
|
||||
# state generation ensures no duplicate LR0ItemSets
|
||||
class LR0ItemSet:
|
||||
__slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
|
||||
|
||||
kernel: State
|
||||
closure: State
|
||||
transitions: Dict[Symbol, 'LR0ItemSet']
|
||||
lookaheads: Dict[Symbol, Set[Rule]]
|
||||
|
||||
def __init__(self, kernel, closure):
|
||||
self.kernel = fzset(kernel)
|
||||
self.closure = fzset(closure)
|
||||
self.transitions = {}
|
||||
self.lookaheads = defaultdict(set)
|
||||
|
||||
def __repr__(self):
|
||||
return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
|
||||
|
||||
|
||||
def update_set(set1, set2):
|
||||
if not set2 or set1 > set2:
|
||||
return False
|
||||
|
||||
copy = set(set1)
|
||||
set1 |= set2
|
||||
return set1 != copy
|
||||
|
||||
def calculate_sets(rules):
|
||||
"""Calculate FOLLOW sets.
|
||||
|
||||
Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
|
||||
symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
|
||||
|
||||
# foreach grammar rule X ::= Y(1) ... Y(k)
|
||||
# if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
|
||||
# NULLABLE = NULLABLE union {X}
|
||||
# for i = 1 to k
|
||||
# if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
|
||||
# FIRST(X) = FIRST(X) union FIRST(Y(i))
|
||||
# for j = i+1 to k
|
||||
# if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
|
||||
# FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
|
||||
# if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
|
||||
# FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
|
||||
# until none of NULLABLE,FIRST,FOLLOW changed in last iteration
|
||||
|
||||
NULLABLE = set()
|
||||
FIRST = {}
|
||||
FOLLOW = {}
|
||||
for sym in symbols:
|
||||
FIRST[sym]={sym} if sym.is_term else set()
|
||||
FOLLOW[sym]=set()
|
||||
|
||||
# Calculate NULLABLE and FIRST
|
||||
changed = True
|
||||
while changed:
|
||||
changed = False
|
||||
|
||||
for rule in rules:
|
||||
if set(rule.expansion) <= NULLABLE:
|
||||
if update_set(NULLABLE, {rule.origin}):
|
||||
changed = True
|
||||
|
||||
for i, sym in enumerate(rule.expansion):
|
||||
if set(rule.expansion[:i]) <= NULLABLE:
|
||||
if update_set(FIRST[rule.origin], FIRST[sym]):
|
||||
changed = True
|
||||
else:
|
||||
break
|
||||
|
||||
# Calculate FOLLOW
|
||||
changed = True
|
||||
while changed:
|
||||
changed = False
|
||||
|
||||
for rule in rules:
|
||||
for i, sym in enumerate(rule.expansion):
|
||||
if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
|
||||
if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
|
||||
changed = True
|
||||
|
||||
for j in range(i+1, len(rule.expansion)):
|
||||
if set(rule.expansion[i+1:j]) <= NULLABLE:
|
||||
if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
|
||||
changed = True
|
||||
|
||||
return FIRST, FOLLOW, NULLABLE
|
||||
|
||||
|
||||
class GrammarAnalyzer:
|
||||
def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
|
||||
self.debug = debug
|
||||
self.strict = strict
|
||||
|
||||
root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
|
||||
for start in parser_conf.start}
|
||||
|
||||
rules = parser_conf.rules + list(root_rules.values())
|
||||
self.rules_by_origin: Dict[NonTerminal, List[Rule]] = classify(rules, lambda r: r.origin)
|
||||
|
||||
if len(rules) != len(set(rules)):
|
||||
duplicates = [item for item, count in Counter(rules).items() if count > 1]
|
||||
raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
|
||||
|
||||
for r in rules:
|
||||
for sym in r.expansion:
|
||||
if not (sym.is_term or sym in self.rules_by_origin):
|
||||
raise GrammarError("Using an undefined rule: %s" % sym)
|
||||
|
||||
self.start_states = {start: self.expand_rule(root_rule.origin)
|
||||
for start, root_rule in root_rules.items()}
|
||||
|
||||
self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
|
||||
for start, root_rule in root_rules.items()}
|
||||
|
||||
lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
|
||||
for start in parser_conf.start}
|
||||
|
||||
lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
|
||||
assert(len(lr0_rules) == len(set(lr0_rules)))
|
||||
|
||||
self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
|
||||
|
||||
# cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
|
||||
self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
|
||||
for start, root_rule in lr0_root_rules.items()}
|
||||
|
||||
self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
|
||||
|
||||
def expand_rule(self, source_rule: NonTerminal, rules_by_origin=None) -> State:
|
||||
"Returns all init_ptrs accessible by rule (recursive)"
|
||||
|
||||
if rules_by_origin is None:
|
||||
rules_by_origin = self.rules_by_origin
|
||||
|
||||
init_ptrs = set()
|
||||
def _expand_rule(rule: NonTerminal) -> Iterator[NonTerminal]:
|
||||
assert not rule.is_term, rule
|
||||
|
||||
for r in rules_by_origin[rule]:
|
||||
init_ptr = RulePtr(r, 0)
|
||||
init_ptrs.add(init_ptr)
|
||||
|
||||
if r.expansion: # if not empty rule
|
||||
new_r = init_ptr.next
|
||||
if not new_r.is_term:
|
||||
assert isinstance(new_r, NonTerminal)
|
||||
yield new_r
|
||||
|
||||
for _ in bfs([source_rule], _expand_rule):
|
||||
pass
|
||||
|
||||
return fzset(init_ptrs)
|
||||
332
ccxt/static_dependencies/lark/parsers/lalr_analysis.py
Normal file
332
ccxt/static_dependencies/lark/parsers/lalr_analysis.py
Normal file
@@ -0,0 +1,332 @@
|
||||
"""This module builds a LALR(1) transition-table for lalr_parser.py
|
||||
|
||||
For now, shift/reduce conflicts are automatically resolved as shifts.
|
||||
"""
|
||||
|
||||
# Author: Erez Shinan (2017)
|
||||
# Email : erezshin@gmail.com
|
||||
|
||||
from typing import Dict, Set, Iterator, Tuple, List, TypeVar, Generic
|
||||
from collections import defaultdict
|
||||
|
||||
from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger
|
||||
from ..exceptions import GrammarError
|
||||
|
||||
from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet, RulePtr, State
|
||||
from ..grammar import Rule, Symbol
|
||||
from ..common import ParserConf
|
||||
|
||||
###{standalone
|
||||
|
||||
class Action:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
def __str__(self):
|
||||
return self.name
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
Shift = Action('Shift')
|
||||
Reduce = Action('Reduce')
|
||||
|
||||
StateT = TypeVar("StateT")
|
||||
|
||||
class ParseTableBase(Generic[StateT]):
|
||||
states: Dict[StateT, Dict[str, Tuple]]
|
||||
start_states: Dict[str, StateT]
|
||||
end_states: Dict[str, StateT]
|
||||
|
||||
def __init__(self, states, start_states, end_states):
|
||||
self.states = states
|
||||
self.start_states = start_states
|
||||
self.end_states = end_states
|
||||
|
||||
def serialize(self, memo):
|
||||
tokens = Enumerator()
|
||||
|
||||
states = {
|
||||
state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
|
||||
for token, (action, arg) in actions.items()}
|
||||
for state, actions in self.states.items()
|
||||
}
|
||||
|
||||
return {
|
||||
'tokens': tokens.reversed(),
|
||||
'states': states,
|
||||
'start_states': self.start_states,
|
||||
'end_states': self.end_states,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def deserialize(cls, data, memo):
|
||||
tokens = data['tokens']
|
||||
states = {
|
||||
state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
|
||||
for token, (action, arg) in actions.items()}
|
||||
for state, actions in data['states'].items()
|
||||
}
|
||||
return cls(states, data['start_states'], data['end_states'])
|
||||
|
||||
class ParseTable(ParseTableBase['State']):
|
||||
"""Parse-table whose key is State, i.e. set[RulePtr]
|
||||
|
||||
Slower than IntParseTable, but useful for debugging
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class IntParseTable(ParseTableBase[int]):
|
||||
"""Parse-table whose key is int. Best for performance."""
|
||||
|
||||
@classmethod
|
||||
def from_ParseTable(cls, parse_table: ParseTable):
|
||||
enum = list(parse_table.states)
|
||||
state_to_idx: Dict['State', int] = {s:i for i,s in enumerate(enum)}
|
||||
int_states = {}
|
||||
|
||||
for s, la in parse_table.states.items():
|
||||
la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
|
||||
for k,v in la.items()}
|
||||
int_states[ state_to_idx[s] ] = la
|
||||
|
||||
|
||||
start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
|
||||
end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
|
||||
return cls(int_states, start_states, end_states)
|
||||
|
||||
###}
|
||||
|
||||
|
||||
# digraph and traverse, see The Theory and Practice of Compiler Writing
|
||||
|
||||
# computes F(x) = G(x) union (union { G(y) | x R y })
|
||||
# X: nodes
|
||||
# R: relation (function mapping node -> list of nodes that satisfy the relation)
|
||||
# G: set valued function
|
||||
def digraph(X, R, G):
|
||||
F = {}
|
||||
S = []
|
||||
N = dict.fromkeys(X, 0)
|
||||
for x in X:
|
||||
# this is always true for the first iteration, but N[x] may be updated in traverse below
|
||||
if N[x] == 0:
|
||||
traverse(x, S, N, X, R, G, F)
|
||||
return F
|
||||
|
||||
# x: single node
|
||||
# S: stack
|
||||
# N: weights
|
||||
# X: nodes
|
||||
# R: relation (see above)
|
||||
# G: set valued function
|
||||
# F: set valued function we are computing (map of input -> output)
|
||||
def traverse(x, S, N, X, R, G, F):
|
||||
S.append(x)
|
||||
d = len(S)
|
||||
N[x] = d
|
||||
F[x] = G[x]
|
||||
for y in R[x]:
|
||||
if N[y] == 0:
|
||||
traverse(y, S, N, X, R, G, F)
|
||||
n_x = N[x]
|
||||
assert(n_x > 0)
|
||||
n_y = N[y]
|
||||
assert(n_y != 0)
|
||||
if (n_y > 0) and (n_y < n_x):
|
||||
N[x] = n_y
|
||||
F[x].update(F[y])
|
||||
if N[x] == d:
|
||||
f_x = F[x]
|
||||
while True:
|
||||
z = S.pop()
|
||||
N[z] = -1
|
||||
F[z] = f_x
|
||||
if z == x:
|
||||
break
|
||||
|
||||
|
||||
class LALR_Analyzer(GrammarAnalyzer):
|
||||
lr0_itemsets: Set[LR0ItemSet]
|
||||
nonterminal_transitions: List[Tuple[LR0ItemSet, Symbol]]
|
||||
lookback: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Rule]]]
|
||||
includes: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Symbol]]]
|
||||
reads: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Symbol]]]
|
||||
directly_reads: Dict[Tuple[LR0ItemSet, Symbol], Set[Symbol]]
|
||||
|
||||
|
||||
def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
|
||||
GrammarAnalyzer.__init__(self, parser_conf, debug, strict)
|
||||
self.nonterminal_transitions = []
|
||||
self.directly_reads = defaultdict(set)
|
||||
self.reads = defaultdict(set)
|
||||
self.includes = defaultdict(set)
|
||||
self.lookback = defaultdict(set)
|
||||
|
||||
|
||||
def compute_lr0_states(self) -> None:
|
||||
self.lr0_itemsets = set()
|
||||
# map of kernels to LR0ItemSets
|
||||
cache: Dict['State', LR0ItemSet] = {}
|
||||
|
||||
def step(state: LR0ItemSet) -> Iterator[LR0ItemSet]:
|
||||
_, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied)
|
||||
|
||||
d = classify(unsat, lambda rp: rp.next)
|
||||
for sym, rps in d.items():
|
||||
kernel = fzset({rp.advance(sym) for rp in rps})
|
||||
new_state = cache.get(kernel, None)
|
||||
if new_state is None:
|
||||
closure = set(kernel)
|
||||
for rp in kernel:
|
||||
if not rp.is_satisfied and not rp.next.is_term:
|
||||
closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)
|
||||
new_state = LR0ItemSet(kernel, closure)
|
||||
cache[kernel] = new_state
|
||||
|
||||
state.transitions[sym] = new_state
|
||||
yield new_state
|
||||
|
||||
self.lr0_itemsets.add(state)
|
||||
|
||||
for _ in bfs(self.lr0_start_states.values(), step):
|
||||
pass
|
||||
|
||||
def compute_reads_relations(self):
|
||||
# handle start state
|
||||
for root in self.lr0_start_states.values():
|
||||
assert(len(root.kernel) == 1)
|
||||
for rp in root.kernel:
|
||||
assert(rp.index == 0)
|
||||
self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])
|
||||
|
||||
for state in self.lr0_itemsets:
|
||||
seen = set()
|
||||
for rp in state.closure:
|
||||
if rp.is_satisfied:
|
||||
continue
|
||||
s = rp.next
|
||||
# if s is a not a nonterminal
|
||||
if s not in self.lr0_rules_by_origin:
|
||||
continue
|
||||
if s in seen:
|
||||
continue
|
||||
seen.add(s)
|
||||
nt = (state, s)
|
||||
self.nonterminal_transitions.append(nt)
|
||||
dr = self.directly_reads[nt]
|
||||
r = self.reads[nt]
|
||||
next_state = state.transitions[s]
|
||||
for rp2 in next_state.closure:
|
||||
if rp2.is_satisfied:
|
||||
continue
|
||||
s2 = rp2.next
|
||||
# if s2 is a terminal
|
||||
if s2 not in self.lr0_rules_by_origin:
|
||||
dr.add(s2)
|
||||
if s2 in self.NULLABLE:
|
||||
r.add((next_state, s2))
|
||||
|
||||
def compute_includes_lookback(self):
|
||||
for nt in self.nonterminal_transitions:
|
||||
state, nonterminal = nt
|
||||
includes = []
|
||||
lookback = self.lookback[nt]
|
||||
for rp in state.closure:
|
||||
if rp.rule.origin != nonterminal:
|
||||
continue
|
||||
# traverse the states for rp(.rule)
|
||||
state2 = state
|
||||
for i in range(rp.index, len(rp.rule.expansion)):
|
||||
s = rp.rule.expansion[i]
|
||||
nt2 = (state2, s)
|
||||
state2 = state2.transitions[s]
|
||||
if nt2 not in self.reads:
|
||||
continue
|
||||
for j in range(i + 1, len(rp.rule.expansion)):
|
||||
if rp.rule.expansion[j] not in self.NULLABLE:
|
||||
break
|
||||
else:
|
||||
includes.append(nt2)
|
||||
# state2 is at the final state for rp.rule
|
||||
if rp.index == 0:
|
||||
for rp2 in state2.closure:
|
||||
if (rp2.rule == rp.rule) and rp2.is_satisfied:
|
||||
lookback.add((state2, rp2.rule))
|
||||
for nt2 in includes:
|
||||
self.includes[nt2].add(nt)
|
||||
|
||||
def compute_lookaheads(self):
|
||||
read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads)
|
||||
follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets)
|
||||
|
||||
for nt, lookbacks in self.lookback.items():
|
||||
for state, rule in lookbacks:
|
||||
for s in follow_sets[nt]:
|
||||
state.lookaheads[s].add(rule)
|
||||
|
||||
def compute_lalr1_states(self) -> None:
|
||||
m: Dict[LR0ItemSet, Dict[str, Tuple]] = {}
|
||||
reduce_reduce = []
|
||||
for itemset in self.lr0_itemsets:
|
||||
actions: Dict[Symbol, Tuple] = {la: (Shift, next_state.closure)
|
||||
for la, next_state in itemset.transitions.items()}
|
||||
for la, rules in itemset.lookaheads.items():
|
||||
if len(rules) > 1:
|
||||
# Try to resolve conflict based on priority
|
||||
p = [(r.options.priority or 0, r) for r in rules]
|
||||
p.sort(key=lambda r: r[0], reverse=True)
|
||||
best, second_best = p[:2]
|
||||
if best[0] > second_best[0]:
|
||||
rules = {best[1]}
|
||||
else:
|
||||
reduce_reduce.append((itemset, la, rules))
|
||||
continue
|
||||
|
||||
rule ,= rules
|
||||
if la in actions:
|
||||
if self.strict:
|
||||
raise GrammarError(f"Shift/Reduce conflict for terminal {la.name}. [strict-mode]\n ")
|
||||
elif self.debug:
|
||||
logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
|
||||
logger.warning(' * %s', rule)
|
||||
else:
|
||||
logger.debug('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
|
||||
logger.debug(' * %s', rule)
|
||||
else:
|
||||
actions[la] = (Reduce, rule)
|
||||
m[itemset] = { k.name: v for k, v in actions.items() }
|
||||
|
||||
if reduce_reduce:
|
||||
msgs = []
|
||||
for itemset, la, rules in reduce_reduce:
|
||||
msg = 'Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t- ' + str(r) for r in rules ]))
|
||||
if self.debug:
|
||||
msg += '\n collision occurred in state: {%s\n }' % ''.join(['\n\t' + str(x) for x in itemset.closure])
|
||||
msgs.append(msg)
|
||||
raise GrammarError('\n\n'.join(msgs))
|
||||
|
||||
states = { k.closure: v for k, v in m.items() }
|
||||
|
||||
# compute end states
|
||||
end_states: Dict[str, 'State'] = {}
|
||||
for state in states:
|
||||
for rp in state:
|
||||
for start in self.lr0_start_states:
|
||||
if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied:
|
||||
assert start not in end_states
|
||||
end_states[start] = state
|
||||
|
||||
start_states = { start: state.closure for start, state in self.lr0_start_states.items() }
|
||||
_parse_table = ParseTable(states, start_states, end_states)
|
||||
|
||||
if self.debug:
|
||||
self.parse_table = _parse_table
|
||||
else:
|
||||
self.parse_table = IntParseTable.from_ParseTable(_parse_table)
|
||||
|
||||
def compute_lalr(self):
|
||||
self.compute_lr0_states()
|
||||
self.compute_reads_relations()
|
||||
self.compute_includes_lookback()
|
||||
self.compute_lookaheads()
|
||||
self.compute_lalr1_states()
|
||||
158
ccxt/static_dependencies/lark/parsers/lalr_interactive_parser.py
Normal file
158
ccxt/static_dependencies/lark/parsers/lalr_interactive_parser.py
Normal file
@@ -0,0 +1,158 @@
|
||||
# This module provides a LALR interactive parser, which is used for debugging and error handling
|
||||
|
||||
from typing import Iterator, List
|
||||
from copy import copy
|
||||
import warnings
|
||||
|
||||
from ..exceptions import UnexpectedToken
|
||||
from ..lexer import Token, LexerThread
|
||||
from .lalr_parser_state import ParserState
|
||||
|
||||
###{standalone
|
||||
|
||||
class InteractiveParser:
|
||||
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.
|
||||
|
||||
For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
|
||||
"""
|
||||
def __init__(self, parser, parser_state: ParserState, lexer_thread: LexerThread):
|
||||
self.parser = parser
|
||||
self.parser_state = parser_state
|
||||
self.lexer_thread = lexer_thread
|
||||
self.result = None
|
||||
|
||||
@property
|
||||
def lexer_state(self) -> LexerThread:
|
||||
warnings.warn("lexer_state will be removed in subsequent releases. Use lexer_thread instead.", DeprecationWarning)
|
||||
return self.lexer_thread
|
||||
|
||||
def feed_token(self, token: Token):
|
||||
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.
|
||||
|
||||
Note that ``token`` has to be an instance of ``Token``.
|
||||
"""
|
||||
return self.parser_state.feed_token(token, token.type == '$END')
|
||||
|
||||
def iter_parse(self) -> Iterator[Token]:
|
||||
"""Step through the different stages of the parse, by reading tokens from the lexer
|
||||
and feeding them to the parser, one per iteration.
|
||||
|
||||
Returns an iterator of the tokens it encounters.
|
||||
|
||||
When the parse is over, the resulting tree can be found in ``InteractiveParser.result``.
|
||||
"""
|
||||
for token in self.lexer_thread.lex(self.parser_state):
|
||||
yield token
|
||||
self.result = self.feed_token(token)
|
||||
|
||||
def exhaust_lexer(self) -> List[Token]:
|
||||
"""Try to feed the rest of the lexer state into the interactive parser.
|
||||
|
||||
Note that this modifies the instance in place and does not feed an '$END' Token
|
||||
"""
|
||||
return list(self.iter_parse())
|
||||
|
||||
|
||||
def feed_eof(self, last_token=None):
|
||||
"""Feed a '$END' Token. Borrows from 'last_token' if given."""
|
||||
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else self.lexer_thread._Token('$END', '', 0, 1, 1)
|
||||
return self.feed_token(eof)
|
||||
|
||||
|
||||
def __copy__(self):
|
||||
"""Create a new interactive parser with a separate state.
|
||||
|
||||
Calls to feed_token() won't affect the old instance, and vice-versa.
|
||||
"""
|
||||
return self.copy()
|
||||
|
||||
def copy(self, deepcopy_values=True):
|
||||
return type(self)(
|
||||
self.parser,
|
||||
self.parser_state.copy(deepcopy_values=deepcopy_values),
|
||||
copy(self.lexer_thread),
|
||||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, InteractiveParser):
|
||||
return False
|
||||
|
||||
return self.parser_state == other.parser_state and self.lexer_thread == other.lexer_thread
|
||||
|
||||
def as_immutable(self):
|
||||
"""Convert to an ``ImmutableInteractiveParser``."""
|
||||
p = copy(self)
|
||||
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread)
|
||||
|
||||
def pretty(self):
|
||||
"""Print the output of ``choices()`` in a way that's easier to read."""
|
||||
out = ["Parser choices:"]
|
||||
for k, v in self.choices().items():
|
||||
out.append('\t- %s -> %r' % (k, v))
|
||||
out.append('stack size: %s' % len(self.parser_state.state_stack))
|
||||
return '\n'.join(out)
|
||||
|
||||
def choices(self):
|
||||
"""Returns a dictionary of token types, matched to their action in the parser.
|
||||
|
||||
Only returns token types that are accepted by the current state.
|
||||
|
||||
Updated by ``feed_token()``.
|
||||
"""
|
||||
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]
|
||||
|
||||
def accepts(self):
|
||||
"""Returns the set of possible tokens that will advance the parser into a new valid state."""
|
||||
accepts = set()
|
||||
conf_no_callbacks = copy(self.parser_state.parse_conf)
|
||||
# We don't want to call callbacks here since those might have arbitrary side effects
|
||||
# and are unnecessarily slow.
|
||||
conf_no_callbacks.callbacks = {}
|
||||
for t in self.choices():
|
||||
if t.isupper(): # is terminal?
|
||||
new_cursor = self.copy(deepcopy_values=False)
|
||||
new_cursor.parser_state.parse_conf = conf_no_callbacks
|
||||
try:
|
||||
new_cursor.feed_token(self.lexer_thread._Token(t, ''))
|
||||
except UnexpectedToken:
|
||||
pass
|
||||
else:
|
||||
accepts.add(t)
|
||||
return accepts
|
||||
|
||||
def resume_parse(self):
|
||||
"""Resume automated parsing from the current state.
|
||||
"""
|
||||
return self.parser.parse_from_state(self.parser_state, last_token=self.lexer_thread.state.last_token)
|
||||
|
||||
|
||||
|
||||
class ImmutableInteractiveParser(InteractiveParser):
|
||||
"""Same as ``InteractiveParser``, but operations create a new instance instead
|
||||
of changing it in-place.
|
||||
"""
|
||||
|
||||
result = None
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.parser_state, self.lexer_thread))
|
||||
|
||||
def feed_token(self, token):
|
||||
c = copy(self)
|
||||
c.result = InteractiveParser.feed_token(c, token)
|
||||
return c
|
||||
|
||||
def exhaust_lexer(self):
|
||||
"""Try to feed the rest of the lexer state into the parser.
|
||||
|
||||
Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
|
||||
cursor = self.as_mutable()
|
||||
cursor.exhaust_lexer()
|
||||
return cursor.as_immutable()
|
||||
|
||||
def as_mutable(self):
|
||||
"""Convert to an ``InteractiveParser``."""
|
||||
p = copy(self)
|
||||
return InteractiveParser(p.parser, p.parser_state, p.lexer_thread)
|
||||
|
||||
###}
|
||||
122
ccxt/static_dependencies/lark/parsers/lalr_parser.py
Normal file
122
ccxt/static_dependencies/lark/parsers/lalr_parser.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""This module implements a LALR(1) Parser
|
||||
"""
|
||||
# Author: Erez Shinan (2017)
|
||||
# Email : erezshin@gmail.com
|
||||
from typing import Dict, Any, Optional
|
||||
from ..lexer import Token, LexerThread
|
||||
from ..utils import Serialize
|
||||
from ..common import ParserConf, ParserCallbacks
|
||||
|
||||
from .lalr_analysis import LALR_Analyzer, IntParseTable, ParseTableBase
|
||||
from .lalr_interactive_parser import InteractiveParser
|
||||
from ..exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
|
||||
from .lalr_parser_state import ParserState, ParseConf
|
||||
|
||||
###{standalone
|
||||
|
||||
class LALR_Parser(Serialize):
|
||||
def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
|
||||
analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict)
|
||||
analysis.compute_lalr()
|
||||
callbacks = parser_conf.callbacks
|
||||
|
||||
self._parse_table = analysis.parse_table
|
||||
self.parser_conf = parser_conf
|
||||
self.parser = _Parser(analysis.parse_table, callbacks, debug)
|
||||
|
||||
@classmethod
|
||||
def deserialize(cls, data, memo, callbacks, debug=False):
|
||||
inst = cls.__new__(cls)
|
||||
inst._parse_table = IntParseTable.deserialize(data, memo)
|
||||
inst.parser = _Parser(inst._parse_table, callbacks, debug)
|
||||
return inst
|
||||
|
||||
def serialize(self, memo: Any = None) -> Dict[str, Any]:
|
||||
return self._parse_table.serialize(memo)
|
||||
|
||||
def parse_interactive(self, lexer: LexerThread, start: str):
|
||||
return self.parser.parse(lexer, start, start_interactive=True)
|
||||
|
||||
def parse(self, lexer, start, on_error=None):
|
||||
try:
|
||||
return self.parser.parse(lexer, start)
|
||||
except UnexpectedInput as e:
|
||||
if on_error is None:
|
||||
raise
|
||||
|
||||
while True:
|
||||
if isinstance(e, UnexpectedCharacters):
|
||||
s = e.interactive_parser.lexer_thread.state
|
||||
p = s.line_ctr.char_pos
|
||||
|
||||
if not on_error(e):
|
||||
raise e
|
||||
|
||||
if isinstance(e, UnexpectedCharacters):
|
||||
# If user didn't change the character position, then we should
|
||||
if p == s.line_ctr.char_pos:
|
||||
s.line_ctr.feed(s.text[p:p+1])
|
||||
|
||||
try:
|
||||
return e.interactive_parser.resume_parse()
|
||||
except UnexpectedToken as e2:
|
||||
if (isinstance(e, UnexpectedToken)
|
||||
and e.token.type == e2.token.type == '$END'
|
||||
and e.interactive_parser == e2.interactive_parser):
|
||||
# Prevent infinite loop
|
||||
raise e2
|
||||
e = e2
|
||||
except UnexpectedCharacters as e2:
|
||||
e = e2
|
||||
|
||||
|
||||
class _Parser:
|
||||
parse_table: ParseTableBase
|
||||
callbacks: ParserCallbacks
|
||||
debug: bool
|
||||
|
||||
def __init__(self, parse_table: ParseTableBase, callbacks: ParserCallbacks, debug: bool=False):
|
||||
self.parse_table = parse_table
|
||||
self.callbacks = callbacks
|
||||
self.debug = debug
|
||||
|
||||
def parse(self, lexer: LexerThread, start: str, value_stack=None, state_stack=None, start_interactive=False):
|
||||
parse_conf = ParseConf(self.parse_table, self.callbacks, start)
|
||||
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
|
||||
if start_interactive:
|
||||
return InteractiveParser(self, parser_state, parser_state.lexer)
|
||||
return self.parse_from_state(parser_state)
|
||||
|
||||
|
||||
def parse_from_state(self, state: ParserState, last_token: Optional[Token]=None):
|
||||
"""Run the main LALR parser loop
|
||||
|
||||
Parameters:
|
||||
state - the initial state. Changed in-place.
|
||||
last_token - Used only for line information in case of an empty lexer.
|
||||
"""
|
||||
try:
|
||||
token = last_token
|
||||
for token in state.lexer.lex(state):
|
||||
assert token is not None
|
||||
state.feed_token(token)
|
||||
|
||||
end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
|
||||
return state.feed_token(end_token, True)
|
||||
except UnexpectedInput as e:
|
||||
try:
|
||||
e.interactive_parser = InteractiveParser(self, state, state.lexer)
|
||||
except NameError:
|
||||
pass
|
||||
raise e
|
||||
except Exception as e:
|
||||
if self.debug:
|
||||
print("")
|
||||
print("STATE STACK DUMP")
|
||||
print("----------------")
|
||||
for i, s in enumerate(state.state_stack):
|
||||
print('%d)' % i , s)
|
||||
print("")
|
||||
|
||||
raise
|
||||
###}
|
||||
110
ccxt/static_dependencies/lark/parsers/lalr_parser_state.py
Normal file
110
ccxt/static_dependencies/lark/parsers/lalr_parser_state.py
Normal file
@@ -0,0 +1,110 @@
|
||||
from copy import deepcopy, copy
|
||||
from typing import Dict, Any, Generic, List
|
||||
from ..lexer import Token, LexerThread
|
||||
from ..common import ParserCallbacks
|
||||
|
||||
from .lalr_analysis import Shift, ParseTableBase, StateT
|
||||
from ..exceptions import UnexpectedToken
|
||||
|
||||
###{standalone
|
||||
|
||||
class ParseConf(Generic[StateT]):
|
||||
__slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
|
||||
|
||||
parse_table: ParseTableBase[StateT]
|
||||
callbacks: ParserCallbacks
|
||||
start: str
|
||||
|
||||
start_state: StateT
|
||||
end_state: StateT
|
||||
states: Dict[StateT, Dict[str, tuple]]
|
||||
|
||||
def __init__(self, parse_table: ParseTableBase[StateT], callbacks: ParserCallbacks, start: str):
|
||||
self.parse_table = parse_table
|
||||
|
||||
self.start_state = self.parse_table.start_states[start]
|
||||
self.end_state = self.parse_table.end_states[start]
|
||||
self.states = self.parse_table.states
|
||||
|
||||
self.callbacks = callbacks
|
||||
self.start = start
|
||||
|
||||
class ParserState(Generic[StateT]):
|
||||
__slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
|
||||
|
||||
parse_conf: ParseConf[StateT]
|
||||
lexer: LexerThread
|
||||
state_stack: List[StateT]
|
||||
value_stack: list
|
||||
|
||||
def __init__(self, parse_conf: ParseConf[StateT], lexer: LexerThread, state_stack=None, value_stack=None):
|
||||
self.parse_conf = parse_conf
|
||||
self.lexer = lexer
|
||||
self.state_stack = state_stack or [self.parse_conf.start_state]
|
||||
self.value_stack = value_stack or []
|
||||
|
||||
@property
|
||||
def position(self) -> StateT:
|
||||
return self.state_stack[-1]
|
||||
|
||||
# Necessary for match_examples() to work
|
||||
def __eq__(self, other) -> bool:
|
||||
if not isinstance(other, ParserState):
|
||||
return NotImplemented
|
||||
return len(self.state_stack) == len(other.state_stack) and self.position == other.position
|
||||
|
||||
def __copy__(self):
|
||||
return self.copy()
|
||||
|
||||
def copy(self, deepcopy_values=True) -> 'ParserState[StateT]':
|
||||
return type(self)(
|
||||
self.parse_conf,
|
||||
self.lexer, # XXX copy
|
||||
copy(self.state_stack),
|
||||
deepcopy(self.value_stack) if deepcopy_values else copy(self.value_stack),
|
||||
)
|
||||
|
||||
def feed_token(self, token: Token, is_end=False) -> Any:
|
||||
state_stack = self.state_stack
|
||||
value_stack = self.value_stack
|
||||
states = self.parse_conf.states
|
||||
end_state = self.parse_conf.end_state
|
||||
callbacks = self.parse_conf.callbacks
|
||||
|
||||
while True:
|
||||
state = state_stack[-1]
|
||||
try:
|
||||
action, arg = states[state][token.type]
|
||||
except KeyError:
|
||||
expected = {s for s in states[state].keys() if s.isupper()}
|
||||
raise UnexpectedToken(token, expected, state=self, interactive_parser=None)
|
||||
|
||||
assert arg != end_state
|
||||
|
||||
if action is Shift:
|
||||
# shift once and return
|
||||
assert not is_end
|
||||
state_stack.append(arg)
|
||||
value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
|
||||
return
|
||||
else:
|
||||
# reduce+shift as many times as necessary
|
||||
rule = arg
|
||||
size = len(rule.expansion)
|
||||
if size:
|
||||
s = value_stack[-size:]
|
||||
del state_stack[-size:]
|
||||
del value_stack[-size:]
|
||||
else:
|
||||
s = []
|
||||
|
||||
value = callbacks[rule](s) if callbacks else s
|
||||
|
||||
_action, new_state = states[state_stack[-1]][rule.origin.name]
|
||||
assert _action is Shift
|
||||
state_stack.append(new_state)
|
||||
value_stack.append(value)
|
||||
|
||||
if is_end and state_stack[-1] == end_state:
|
||||
return value_stack[-1]
|
||||
###}
|
||||
165
ccxt/static_dependencies/lark/parsers/xearley.py
Normal file
165
ccxt/static_dependencies/lark/parsers/xearley.py
Normal file
@@ -0,0 +1,165 @@
|
||||
"""This module implements an Earley parser with a dynamic lexer
|
||||
|
||||
The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
|
||||
https://www.sciencedirect.com/science/article/pii/S1571066108001497
|
||||
|
||||
That is probably the best reference for understanding the algorithm here.
|
||||
|
||||
The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
|
||||
is better documented here:
|
||||
http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
|
||||
|
||||
Instead of running a lexer beforehand, or using a costy char-by-char method, this parser
|
||||
uses regular expressions by necessity, achieving high-performance while maintaining all of
|
||||
Earley's power in parsing any CFG.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Callable, Optional, List, Any
|
||||
from collections import defaultdict
|
||||
|
||||
from ..tree import Tree
|
||||
from ..exceptions import UnexpectedCharacters
|
||||
from ..lexer import Token
|
||||
from ..grammar import Terminal
|
||||
from .earley import Parser as BaseParser
|
||||
from .earley_forest import TokenNode
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..common import LexerConf, ParserConf
|
||||
|
||||
class Parser(BaseParser):
|
||||
def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matcher: Callable,
|
||||
resolve_ambiguity: bool=True, complete_lex: bool=False, debug: bool=False,
|
||||
tree_class: Optional[Callable[[str, List], Any]]=Tree, ordered_sets: bool=True):
|
||||
BaseParser.__init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity,
|
||||
debug, tree_class, ordered_sets)
|
||||
self.ignore = [Terminal(t) for t in lexer_conf.ignore]
|
||||
self.complete_lex = complete_lex
|
||||
|
||||
def _parse(self, stream, columns, to_scan, start_symbol=None):
|
||||
|
||||
def scan(i, to_scan):
|
||||
"""The core Earley Scanner.
|
||||
|
||||
This is a custom implementation of the scanner that uses the
|
||||
Lark lexer to match tokens. The scan list is built by the
|
||||
Earley predictor, based on the previously completed tokens.
|
||||
This ensures that at each phase of the parse we have a custom
|
||||
lexer context, allowing for more complex ambiguities."""
|
||||
|
||||
node_cache = {}
|
||||
|
||||
# 1) Loop the expectations and ask the lexer to match.
|
||||
# Since regexp is forward looking on the input stream, and we only
|
||||
# want to process tokens when we hit the point in the stream at which
|
||||
# they complete, we push all tokens into a buffer (delayed_matches), to
|
||||
# be held possibly for a later parse step when we reach the point in the
|
||||
# input stream at which they complete.
|
||||
for item in self.Set(to_scan):
|
||||
m = match(item.expect, stream, i)
|
||||
if m:
|
||||
t = Token(item.expect.name, m.group(0), i, text_line, text_column)
|
||||
delayed_matches[m.end()].append( (item, i, t) )
|
||||
|
||||
if self.complete_lex:
|
||||
s = m.group(0)
|
||||
for j in range(1, len(s)):
|
||||
m = match(item.expect, s[:-j])
|
||||
if m:
|
||||
t = Token(item.expect.name, m.group(0), i, text_line, text_column)
|
||||
delayed_matches[i+m.end()].append( (item, i, t) )
|
||||
|
||||
# XXX The following 3 lines were commented out for causing a bug. See issue #768
|
||||
# # Remove any items that successfully matched in this pass from the to_scan buffer.
|
||||
# # This ensures we don't carry over tokens that already matched, if we're ignoring below.
|
||||
# to_scan.remove(item)
|
||||
|
||||
# 3) Process any ignores. This is typically used for e.g. whitespace.
|
||||
# We carry over any unmatched items from the to_scan buffer to be matched again after
|
||||
# the ignore. This should allow us to use ignored symbols in non-terminals to implement
|
||||
# e.g. mandatory spacing.
|
||||
for x in self.ignore:
|
||||
m = match(x, stream, i)
|
||||
if m:
|
||||
# Carry over any items still in the scan buffer, to past the end of the ignored items.
|
||||
delayed_matches[m.end()].extend([(item, i, None) for item in to_scan ])
|
||||
|
||||
# If we're ignoring up to the end of the file, # carry over the start symbol if it already completed.
|
||||
delayed_matches[m.end()].extend([(item, i, None) for item in columns[i] if item.is_complete and item.s == start_symbol])
|
||||
|
||||
next_to_scan = self.Set()
|
||||
next_set = self.Set()
|
||||
columns.append(next_set)
|
||||
transitives.append({})
|
||||
|
||||
## 4) Process Tokens from delayed_matches.
|
||||
# This is the core of the Earley scanner. Create an SPPF node for each Token,
|
||||
# and create the symbol node in the SPPF tree. Advance the item that completed,
|
||||
# and add the resulting new item to either the Earley set (for processing by the
|
||||
# completer/predictor) or the to_scan buffer for the next parse step.
|
||||
for item, start, token in delayed_matches[i+1]:
|
||||
if token is not None:
|
||||
token.end_line = text_line
|
||||
token.end_column = text_column + 1
|
||||
token.end_pos = i + 1
|
||||
|
||||
new_item = item.advance()
|
||||
label = (new_item.s, new_item.start, i + 1)
|
||||
token_node = TokenNode(token, terminals[token.type])
|
||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
|
||||
new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token_node)
|
||||
else:
|
||||
new_item = item
|
||||
|
||||
if new_item.expect in self.TERMINALS:
|
||||
# add (B ::= Aai+1.B, h, y) to Q'
|
||||
next_to_scan.add(new_item)
|
||||
else:
|
||||
# add (B ::= Aa+1.B, h, y) to Ei+1
|
||||
next_set.add(new_item)
|
||||
|
||||
del delayed_matches[i+1] # No longer needed, so unburden memory
|
||||
|
||||
if not next_set and not delayed_matches and not next_to_scan:
|
||||
considered_rules = list(sorted(to_scan, key=lambda key: key.rule.origin.name))
|
||||
raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan},
|
||||
set(to_scan), state=frozenset(i.s for i in to_scan),
|
||||
considered_rules=considered_rules
|
||||
)
|
||||
|
||||
return next_to_scan
|
||||
|
||||
|
||||
delayed_matches = defaultdict(list)
|
||||
match = self.term_matcher
|
||||
terminals = self.lexer_conf.terminals_by_name
|
||||
|
||||
# Cache for nodes & tokens created in a particular parse step.
|
||||
transitives = [{}]
|
||||
|
||||
text_line = 1
|
||||
text_column = 1
|
||||
|
||||
## The main Earley loop.
|
||||
# Run the Prediction/Completion cycle for any Items in the current Earley set.
|
||||
# Completions will be added to the SPPF tree, and predictions will be recursively
|
||||
# processed down to terminals/empty nodes to be added to the scanner for the next
|
||||
# step.
|
||||
i = 0
|
||||
for token in stream:
|
||||
self.predict_and_complete(i, to_scan, columns, transitives)
|
||||
|
||||
to_scan = scan(i, to_scan)
|
||||
|
||||
if token == '\n':
|
||||
text_line += 1
|
||||
text_column = 1
|
||||
else:
|
||||
text_column += 1
|
||||
i += 1
|
||||
|
||||
self.predict_and_complete(i, to_scan, columns, transitives)
|
||||
|
||||
## Column is now the final column in the parse.
|
||||
assert i == len(columns)-1
|
||||
return to_scan
|
||||
0
ccxt/static_dependencies/lark/py.typed
Normal file
0
ccxt/static_dependencies/lark/py.typed
Normal file
107
ccxt/static_dependencies/lark/reconstruct.py
Normal file
107
ccxt/static_dependencies/lark/reconstruct.py
Normal file
@@ -0,0 +1,107 @@
|
||||
"""This is an experimental tool for reconstructing text from a shaped tree, based on a Lark grammar.
|
||||
"""
|
||||
|
||||
from typing import Dict, Callable, Iterable, Optional
|
||||
|
||||
from .lark import Lark
|
||||
from .tree import Tree, ParseTree
|
||||
from .visitors import Transformer_InPlace
|
||||
from .lexer import Token, PatternStr, TerminalDef
|
||||
from .grammar import Terminal, NonTerminal, Symbol
|
||||
|
||||
from .tree_matcher import TreeMatcher, is_discarded_terminal
|
||||
from .utils import is_id_continue
|
||||
|
||||
def is_iter_empty(i):
|
||||
try:
|
||||
_ = next(i)
|
||||
return False
|
||||
except StopIteration:
|
||||
return True
|
||||
|
||||
|
||||
class WriteTokensTransformer(Transformer_InPlace):
|
||||
"Inserts discarded tokens into their correct place, according to the rules of grammar"
|
||||
|
||||
tokens: Dict[str, TerminalDef]
|
||||
term_subs: Dict[str, Callable[[Symbol], str]]
|
||||
|
||||
def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None:
|
||||
self.tokens = tokens
|
||||
self.term_subs = term_subs
|
||||
|
||||
def __default__(self, data, children, meta):
|
||||
if not getattr(meta, 'match_tree', False):
|
||||
return Tree(data, children)
|
||||
|
||||
iter_args = iter(children)
|
||||
to_write = []
|
||||
for sym in meta.orig_expansion:
|
||||
if is_discarded_terminal(sym):
|
||||
try:
|
||||
v = self.term_subs[sym.name](sym)
|
||||
except KeyError:
|
||||
t = self.tokens[sym.name]
|
||||
if not isinstance(t.pattern, PatternStr):
|
||||
raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
|
||||
|
||||
v = t.pattern.value
|
||||
to_write.append(v)
|
||||
else:
|
||||
x = next(iter_args)
|
||||
if isinstance(x, list):
|
||||
to_write += x
|
||||
else:
|
||||
if isinstance(x, Token):
|
||||
assert Terminal(x.type) == sym, x
|
||||
else:
|
||||
assert NonTerminal(x.data) == sym, (sym, x)
|
||||
to_write.append(x)
|
||||
|
||||
assert is_iter_empty(iter_args)
|
||||
return to_write
|
||||
|
||||
|
||||
class Reconstructor(TreeMatcher):
|
||||
"""
|
||||
A Reconstructor that will, given a full parse Tree, generate source code.
|
||||
|
||||
Note:
|
||||
The reconstructor cannot generate values from regexps. If you need to produce discarded
|
||||
regexes, such as newlines, use `term_subs` and provide default values for them.
|
||||
|
||||
Parameters:
|
||||
parser: a Lark instance
|
||||
term_subs: a dictionary of [Terminal name as str] to [output text as str]
|
||||
"""
|
||||
|
||||
write_tokens: WriteTokensTransformer
|
||||
|
||||
def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None:
|
||||
TreeMatcher.__init__(self, parser)
|
||||
|
||||
self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
|
||||
|
||||
def _reconstruct(self, tree):
|
||||
unreduced_tree = self.match_tree(tree, tree.data)
|
||||
|
||||
res = self.write_tokens.transform(unreduced_tree)
|
||||
for item in res:
|
||||
if isinstance(item, Tree):
|
||||
# TODO use orig_expansion.rulename to support templates
|
||||
yield from self._reconstruct(item)
|
||||
else:
|
||||
yield item
|
||||
|
||||
def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str:
|
||||
x = self._reconstruct(tree)
|
||||
if postproc:
|
||||
x = postproc(x)
|
||||
y = []
|
||||
prev_item = ''
|
||||
for item in x:
|
||||
if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]):
|
||||
y.append(' ')
|
||||
y.append(item)
|
||||
prev_item = item
|
||||
return ''.join(y)
|
||||
70
ccxt/static_dependencies/lark/tools/__init__.py
Normal file
70
ccxt/static_dependencies/lark/tools/__init__.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import sys
|
||||
from argparse import ArgumentParser, FileType
|
||||
from textwrap import indent
|
||||
from logging import DEBUG, INFO, WARN, ERROR
|
||||
from typing import Optional
|
||||
import warnings
|
||||
|
||||
from lark import Lark, logger
|
||||
try:
|
||||
from interegular import logger as interegular_logger
|
||||
has_interegular = True
|
||||
except ImportError:
|
||||
has_interegular = False
|
||||
|
||||
lalr_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options')
|
||||
|
||||
flags = [
|
||||
('d', 'debug'),
|
||||
'keep_all_tokens',
|
||||
'regex',
|
||||
'propagate_positions',
|
||||
'maybe_placeholders',
|
||||
'use_bytes'
|
||||
]
|
||||
|
||||
options = ['start', 'lexer']
|
||||
|
||||
lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times")
|
||||
lalr_argparser.add_argument('-s', '--start', action='append', default=[])
|
||||
lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('basic', 'contextual'))
|
||||
lalr_argparser.add_argument('-o', '--out', type=FileType('w', encoding='utf-8'), default=sys.stdout, help='the output file (default=stdout)')
|
||||
lalr_argparser.add_argument('grammar_file', type=FileType('r', encoding='utf-8'), help='A valid .lark file')
|
||||
|
||||
for flag in flags:
|
||||
if isinstance(flag, tuple):
|
||||
options.append(flag[1])
|
||||
lalr_argparser.add_argument('-' + flag[0], '--' + flag[1], action='store_true')
|
||||
elif isinstance(flag, str):
|
||||
options.append(flag)
|
||||
lalr_argparser.add_argument('--' + flag, action='store_true')
|
||||
else:
|
||||
raise NotImplementedError("flags must only contain strings or tuples of strings")
|
||||
|
||||
|
||||
def build_lalr(namespace):
|
||||
logger.setLevel((ERROR, WARN, INFO, DEBUG)[min(namespace.verbose, 3)])
|
||||
if has_interegular:
|
||||
interegular_logger.setLevel(logger.getEffectiveLevel())
|
||||
if len(namespace.start) == 0:
|
||||
namespace.start.append('start')
|
||||
kwargs = {n: getattr(namespace, n) for n in options}
|
||||
return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out
|
||||
|
||||
|
||||
def showwarning_as_comment(message, category, filename, lineno, file=None, line=None):
|
||||
# Based on warnings._showwarnmsg_impl
|
||||
text = warnings.formatwarning(message, category, filename, lineno, line)
|
||||
text = indent(text, '# ')
|
||||
if file is None:
|
||||
file = sys.stderr
|
||||
if file is None:
|
||||
return
|
||||
try:
|
||||
file.write(text)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def make_warnings_comments():
|
||||
warnings.showwarning = showwarning_as_comment
|
||||
202
ccxt/static_dependencies/lark/tools/nearley.py
Normal file
202
ccxt/static_dependencies/lark/tools/nearley.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"Converts Nearley grammars to Lark"
|
||||
|
||||
import os.path
|
||||
import sys
|
||||
import codecs
|
||||
import argparse
|
||||
|
||||
|
||||
from lark import Lark, Transformer, v_args
|
||||
|
||||
nearley_grammar = r"""
|
||||
start: (ruledef|directive)+
|
||||
|
||||
directive: "@" NAME (STRING|NAME)
|
||||
| "@" JS -> js_code
|
||||
ruledef: NAME "->" expansions
|
||||
| NAME REGEXP "->" expansions -> macro
|
||||
expansions: expansion ("|" expansion)*
|
||||
|
||||
expansion: expr+ js
|
||||
|
||||
?expr: item (":" /[+*?]/)?
|
||||
|
||||
?item: rule|string|regexp|null
|
||||
| "(" expansions ")"
|
||||
|
||||
rule: NAME
|
||||
string: STRING
|
||||
regexp: REGEXP
|
||||
null: "null"
|
||||
JS: /{%.*?%}/s
|
||||
js: JS?
|
||||
|
||||
NAME: /[a-zA-Z_$]\w*/
|
||||
COMMENT: /#[^\n]*/
|
||||
REGEXP: /\[.*?\]/
|
||||
|
||||
STRING: _STRING "i"?
|
||||
|
||||
%import common.ESCAPED_STRING -> _STRING
|
||||
%import common.WS
|
||||
%ignore WS
|
||||
%ignore COMMENT
|
||||
|
||||
"""
|
||||
|
||||
nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='basic')
|
||||
|
||||
def _get_rulename(name):
|
||||
name = {'_': '_ws_maybe', '__': '_ws'}.get(name, name)
|
||||
return 'n_' + name.replace('$', '__DOLLAR__').lower()
|
||||
|
||||
@v_args(inline=True)
|
||||
class NearleyToLark(Transformer):
|
||||
def __init__(self):
|
||||
self._count = 0
|
||||
self.extra_rules = {}
|
||||
self.extra_rules_rev = {}
|
||||
self.alias_js_code = {}
|
||||
|
||||
def _new_function(self, code):
|
||||
name = 'alias_%d' % self._count
|
||||
self._count += 1
|
||||
|
||||
self.alias_js_code[name] = code
|
||||
return name
|
||||
|
||||
def _extra_rule(self, rule):
|
||||
if rule in self.extra_rules_rev:
|
||||
return self.extra_rules_rev[rule]
|
||||
|
||||
name = 'xrule_%d' % len(self.extra_rules)
|
||||
assert name not in self.extra_rules
|
||||
self.extra_rules[name] = rule
|
||||
self.extra_rules_rev[rule] = name
|
||||
return name
|
||||
|
||||
def rule(self, name):
|
||||
return _get_rulename(name)
|
||||
|
||||
def ruledef(self, name, exps):
|
||||
return '!%s: %s' % (_get_rulename(name), exps)
|
||||
|
||||
def expr(self, item, op):
|
||||
rule = '(%s)%s' % (item, op)
|
||||
return self._extra_rule(rule)
|
||||
|
||||
def regexp(self, r):
|
||||
return '/%s/' % r
|
||||
|
||||
def null(self):
|
||||
return ''
|
||||
|
||||
def string(self, s):
|
||||
return self._extra_rule(s)
|
||||
|
||||
def expansion(self, *x):
|
||||
x, js = x[:-1], x[-1]
|
||||
if js.children:
|
||||
js_code ,= js.children
|
||||
js_code = js_code[2:-2]
|
||||
alias = '-> ' + self._new_function(js_code)
|
||||
else:
|
||||
alias = ''
|
||||
return ' '.join(x) + alias
|
||||
|
||||
def expansions(self, *x):
|
||||
return '%s' % ('\n |'.join(x))
|
||||
|
||||
def start(self, *rules):
|
||||
return '\n'.join(filter(None, rules))
|
||||
|
||||
def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
|
||||
rule_defs = []
|
||||
|
||||
tree = nearley_grammar_parser.parse(g)
|
||||
for statement in tree.children:
|
||||
if statement.data == 'directive':
|
||||
directive, arg = statement.children
|
||||
if directive in ('builtin', 'include'):
|
||||
folder = builtin_path if directive == 'builtin' else folder_path
|
||||
path = os.path.join(folder, arg[1:-1])
|
||||
if path not in includes:
|
||||
includes.add(path)
|
||||
with codecs.open(path, encoding='utf8') as f:
|
||||
text = f.read()
|
||||
rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
|
||||
else:
|
||||
assert False, directive
|
||||
elif statement.data == 'js_code':
|
||||
code ,= statement.children
|
||||
code = code[2:-2]
|
||||
js_code.append(code)
|
||||
elif statement.data == 'macro':
|
||||
pass # TODO Add support for macros!
|
||||
elif statement.data == 'ruledef':
|
||||
rule_defs.append(n2l.transform(statement))
|
||||
else:
|
||||
raise Exception("Unknown statement: %s" % statement)
|
||||
|
||||
return rule_defs
|
||||
|
||||
|
||||
def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False):
|
||||
import js2py
|
||||
|
||||
emit_code = []
|
||||
def emit(x=None):
|
||||
if x:
|
||||
emit_code.append(x)
|
||||
emit_code.append('\n')
|
||||
|
||||
js_code = ['function id(x) {return x[0];}']
|
||||
n2l = NearleyToLark()
|
||||
rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
|
||||
lark_g = '\n'.join(rule_defs)
|
||||
lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
|
||||
|
||||
emit('from lark import Lark, Transformer')
|
||||
emit()
|
||||
emit('grammar = ' + repr(lark_g))
|
||||
emit()
|
||||
|
||||
for alias, code in n2l.alias_js_code.items():
|
||||
js_code.append('%s = (%s);' % (alias, code))
|
||||
|
||||
if es6:
|
||||
emit(js2py.translate_js6('\n'.join(js_code)))
|
||||
else:
|
||||
emit(js2py.translate_js('\n'.join(js_code)))
|
||||
emit('class TransformNearley(Transformer):')
|
||||
for alias in n2l.alias_js_code:
|
||||
emit(" %s = var.get('%s').to_python()" % (alias, alias))
|
||||
emit(" __default__ = lambda self, n, c, m: c if c else None")
|
||||
|
||||
emit()
|
||||
emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
|
||||
emit('def parse(text):')
|
||||
emit(' return TransformNearley().transform(parser.parse(text))')
|
||||
|
||||
return ''.join(emit_code)
|
||||
|
||||
def main(fn, start, nearley_lib, es6=False):
|
||||
with codecs.open(fn, encoding='utf8') as f:
|
||||
grammar = f.read()
|
||||
return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6)
|
||||
|
||||
def get_arg_parser():
|
||||
parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.')
|
||||
parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar')
|
||||
parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule')
|
||||
parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)')
|
||||
parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true')
|
||||
return parser
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = get_arg_parser()
|
||||
if len(sys.argv) == 1:
|
||||
parser.print_help(sys.stderr)
|
||||
sys.exit(1)
|
||||
args = parser.parse_args()
|
||||
print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6))
|
||||
32
ccxt/static_dependencies/lark/tools/serialize.py
Normal file
32
ccxt/static_dependencies/lark/tools/serialize.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import sys
|
||||
import json
|
||||
|
||||
from lark.grammar import Rule
|
||||
from lark.lexer import TerminalDef
|
||||
from lark.tools import lalr_argparser, build_lalr
|
||||
|
||||
import argparse
|
||||
|
||||
argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[lalr_argparser],
|
||||
description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file",
|
||||
epilog='Look at the Lark documentation for more info on the options')
|
||||
|
||||
|
||||
def serialize(lark_inst, outfile):
|
||||
data, memo = lark_inst.memo_serialize([TerminalDef, Rule])
|
||||
outfile.write('{\n')
|
||||
outfile.write(' "data": %s,\n' % json.dumps(data))
|
||||
outfile.write(' "memo": %s\n' % json.dumps(memo))
|
||||
outfile.write('}\n')
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv)==1:
|
||||
argparser.print_help(sys.stderr)
|
||||
sys.exit(1)
|
||||
ns = argparser.parse_args()
|
||||
serialize(*build_lalr(ns))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
196
ccxt/static_dependencies/lark/tools/standalone.py
Normal file
196
ccxt/static_dependencies/lark/tools/standalone.py
Normal file
@@ -0,0 +1,196 @@
|
||||
###{standalone
|
||||
#
|
||||
#
|
||||
# Lark Stand-alone Generator Tool
|
||||
# ----------------------------------
|
||||
# Generates a stand-alone LALR(1) parser
|
||||
#
|
||||
# Git: https://github.com/erezsh/lark
|
||||
# Author: Erez Shinan (erezshin@gmail.com)
|
||||
#
|
||||
#
|
||||
# >>> LICENSE
|
||||
#
|
||||
# This tool and its generated code use a separate license from Lark,
|
||||
# and are subject to the terms of the Mozilla Public License, v. 2.0.
|
||||
# If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# If you wish to purchase a commercial license for this tool and its
|
||||
# generated code, you may contact me via email or otherwise.
|
||||
#
|
||||
# If MPL2 is incompatible with your free or open-source project,
|
||||
# contact me and we'll work it out.
|
||||
#
|
||||
#
|
||||
|
||||
from copy import deepcopy
|
||||
from abc import ABC, abstractmethod
|
||||
from types import ModuleType
|
||||
from typing import (
|
||||
TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
|
||||
Union, Iterable, IO, TYPE_CHECKING, overload, Sequence,
|
||||
Pattern as REPattern, ClassVar, Set, Mapping
|
||||
)
|
||||
###}
|
||||
|
||||
import sys
|
||||
import token, tokenize
|
||||
import os
|
||||
from os import path
|
||||
from collections import defaultdict
|
||||
from functools import partial
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import lark
|
||||
from lark.tools import lalr_argparser, build_lalr, make_warnings_comments
|
||||
|
||||
|
||||
from lark.grammar import Rule
|
||||
from lark.lexer import TerminalDef
|
||||
|
||||
_dir = path.dirname(__file__)
|
||||
_larkdir = path.join(_dir, path.pardir)
|
||||
|
||||
|
||||
EXTRACT_STANDALONE_FILES = [
|
||||
'tools/standalone.py',
|
||||
'exceptions.py',
|
||||
'utils.py',
|
||||
'tree.py',
|
||||
'visitors.py',
|
||||
'grammar.py',
|
||||
'lexer.py',
|
||||
'common.py',
|
||||
'parse_tree_builder.py',
|
||||
'parsers/lalr_analysis.py',
|
||||
'parsers/lalr_parser_state.py',
|
||||
'parsers/lalr_parser.py',
|
||||
'parsers/lalr_interactive_parser.py',
|
||||
'parser_frontends.py',
|
||||
'lark.py',
|
||||
'indenter.py',
|
||||
]
|
||||
|
||||
def extract_sections(lines):
|
||||
section = None
|
||||
text = []
|
||||
sections = defaultdict(list)
|
||||
for line in lines:
|
||||
if line.startswith('###'):
|
||||
if line[3] == '{':
|
||||
section = line[4:].strip()
|
||||
elif line[3] == '}':
|
||||
sections[section] += text
|
||||
section = None
|
||||
text = []
|
||||
else:
|
||||
raise ValueError(line)
|
||||
elif section:
|
||||
text.append(line)
|
||||
|
||||
return {name: ''.join(text) for name, text in sections.items()}
|
||||
|
||||
|
||||
def strip_docstrings(line_gen):
|
||||
""" Strip comments and docstrings from a file.
|
||||
Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
|
||||
"""
|
||||
res = []
|
||||
|
||||
prev_toktype = token.INDENT
|
||||
last_lineno = -1
|
||||
last_col = 0
|
||||
|
||||
tokgen = tokenize.generate_tokens(line_gen)
|
||||
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
|
||||
if slineno > last_lineno:
|
||||
last_col = 0
|
||||
if scol > last_col:
|
||||
res.append(" " * (scol - last_col))
|
||||
if toktype == token.STRING and prev_toktype == token.INDENT:
|
||||
# Docstring
|
||||
res.append("#--")
|
||||
elif toktype == tokenize.COMMENT:
|
||||
# Comment
|
||||
res.append("##\n")
|
||||
else:
|
||||
res.append(ttext)
|
||||
prev_toktype = toktype
|
||||
last_col = ecol
|
||||
last_lineno = elineno
|
||||
|
||||
return ''.join(res)
|
||||
|
||||
|
||||
def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False):
|
||||
if output is None:
|
||||
output = partial(print, file=out)
|
||||
|
||||
import pickle, zlib, base64
|
||||
def compressed_output(obj):
|
||||
s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
|
||||
c = zlib.compress(s)
|
||||
output(repr(base64.b64encode(c)))
|
||||
|
||||
def output_decompress(name):
|
||||
output('%(name)s = pickle.loads(zlib.decompress(base64.b64decode(%(name)s)))' % locals())
|
||||
|
||||
output('# The file was automatically generated by Lark v%s' % lark.__version__)
|
||||
output('__version__ = "%s"' % lark.__version__)
|
||||
output()
|
||||
|
||||
for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
|
||||
with open(os.path.join(_larkdir, pyfile)) as f:
|
||||
code = extract_sections(f)['standalone']
|
||||
if i: # if not this file
|
||||
code = strip_docstrings(partial(next, iter(code.splitlines(True))))
|
||||
output(code)
|
||||
|
||||
data, m = lark_inst.memo_serialize([TerminalDef, Rule])
|
||||
output('import pickle, zlib, base64')
|
||||
if compress:
|
||||
output('DATA = (')
|
||||
compressed_output(data)
|
||||
output(')')
|
||||
output_decompress('DATA')
|
||||
output('MEMO = (')
|
||||
compressed_output(m)
|
||||
output(')')
|
||||
output_decompress('MEMO')
|
||||
else:
|
||||
output('DATA = (')
|
||||
output(data)
|
||||
output(')')
|
||||
output('MEMO = (')
|
||||
output(m)
|
||||
output(')')
|
||||
|
||||
|
||||
output('Shift = 0')
|
||||
output('Reduce = 1')
|
||||
output("def Lark_StandAlone(**kwargs):")
|
||||
output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")
|
||||
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
make_warnings_comments()
|
||||
parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool",
|
||||
parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options')
|
||||
parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression")
|
||||
if len(sys.argv) == 1:
|
||||
parser.print_help(sys.stderr)
|
||||
sys.exit(1)
|
||||
ns = parser.parse_args()
|
||||
|
||||
lark_inst, out = build_lalr(ns)
|
||||
gen_standalone(lark_inst, out=out, compress=ns.compress)
|
||||
|
||||
ns.out.close()
|
||||
ns.grammar_file.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
267
ccxt/static_dependencies/lark/tree.py
Normal file
267
ccxt/static_dependencies/lark/tree.py
Normal file
@@ -0,0 +1,267 @@
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
|
||||
from typing import List, Callable, Iterator, Union, Optional, Generic, TypeVar, TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .lexer import TerminalDef, Token
|
||||
try:
|
||||
import rich
|
||||
except ImportError:
|
||||
pass
|
||||
from typing import Literal
|
||||
|
||||
###{standalone
|
||||
|
||||
class Meta:
|
||||
|
||||
empty: bool
|
||||
line: int
|
||||
column: int
|
||||
start_pos: int
|
||||
end_line: int
|
||||
end_column: int
|
||||
end_pos: int
|
||||
orig_expansion: 'List[TerminalDef]'
|
||||
match_tree: bool
|
||||
|
||||
def __init__(self):
|
||||
self.empty = True
|
||||
|
||||
|
||||
_Leaf_T = TypeVar("_Leaf_T")
|
||||
Branch = Union[_Leaf_T, 'Tree[_Leaf_T]']
|
||||
|
||||
|
||||
class Tree(Generic[_Leaf_T]):
|
||||
"""The main tree class.
|
||||
|
||||
Creates a new tree, and stores "data" and "children" in attributes of the same name.
|
||||
Trees can be hashed and compared.
|
||||
|
||||
Parameters:
|
||||
data: The name of the rule or alias
|
||||
children: List of matched sub-rules and terminals
|
||||
meta: Line & Column numbers (if ``propagate_positions`` is enabled).
|
||||
meta attributes: (line, column, end_line, end_column, start_pos, end_pos,
|
||||
container_line, container_column, container_end_line, container_end_column)
|
||||
container_* attributes consider all symbols, including those that have been inlined in the tree.
|
||||
For example, in the rule 'a: _A B _C', the regular attributes will mark the start and end of B,
|
||||
but the container_* attributes will also include _A and _C in the range. However, rules that
|
||||
contain 'a' will consider it in full, including _A and _C for all attributes.
|
||||
"""
|
||||
|
||||
data: str
|
||||
children: 'List[Branch[_Leaf_T]]'
|
||||
|
||||
def __init__(self, data: str, children: 'List[Branch[_Leaf_T]]', meta: Optional[Meta]=None) -> None:
|
||||
self.data = data
|
||||
self.children = children
|
||||
self._meta = meta
|
||||
|
||||
@property
|
||||
def meta(self) -> Meta:
|
||||
if self._meta is None:
|
||||
self._meta = Meta()
|
||||
return self._meta
|
||||
|
||||
def __repr__(self):
|
||||
return 'Tree(%r, %r)' % (self.data, self.children)
|
||||
|
||||
def _pretty_label(self):
|
||||
return self.data
|
||||
|
||||
def _pretty(self, level, indent_str):
|
||||
yield f'{indent_str*level}{self._pretty_label()}'
|
||||
if len(self.children) == 1 and not isinstance(self.children[0], Tree):
|
||||
yield f'\t{self.children[0]}\n'
|
||||
else:
|
||||
yield '\n'
|
||||
for n in self.children:
|
||||
if isinstance(n, Tree):
|
||||
yield from n._pretty(level+1, indent_str)
|
||||
else:
|
||||
yield f'{indent_str*(level+1)}{n}\n'
|
||||
|
||||
def pretty(self, indent_str: str=' ') -> str:
|
||||
"""Returns an indented string representation of the tree.
|
||||
|
||||
Great for debugging.
|
||||
"""
|
||||
return ''.join(self._pretty(0, indent_str))
|
||||
|
||||
def __rich__(self, parent:Optional['rich.tree.Tree']=None) -> 'rich.tree.Tree':
|
||||
"""Returns a tree widget for the 'rich' library.
|
||||
|
||||
Example:
|
||||
::
|
||||
from rich import print
|
||||
from lark import Tree
|
||||
|
||||
tree = Tree('root', ['node1', 'node2'])
|
||||
print(tree)
|
||||
"""
|
||||
return self._rich(parent)
|
||||
|
||||
def _rich(self, parent):
|
||||
if parent:
|
||||
tree = parent.add(f'[bold]{self.data}[/bold]')
|
||||
else:
|
||||
import rich.tree
|
||||
tree = rich.tree.Tree(self.data)
|
||||
|
||||
for c in self.children:
|
||||
if isinstance(c, Tree):
|
||||
c._rich(tree)
|
||||
else:
|
||||
tree.add(f'[green]{c}[/green]')
|
||||
|
||||
return tree
|
||||
|
||||
def __eq__(self, other):
|
||||
try:
|
||||
return self.data == other.data and self.children == other.children
|
||||
except AttributeError:
|
||||
return False
|
||||
|
||||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash((self.data, tuple(self.children)))
|
||||
|
||||
def iter_subtrees(self) -> 'Iterator[Tree[_Leaf_T]]':
|
||||
"""Depth-first iteration.
|
||||
|
||||
Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG).
|
||||
"""
|
||||
queue = [self]
|
||||
subtrees = dict()
|
||||
for subtree in queue:
|
||||
subtrees[id(subtree)] = subtree
|
||||
queue += [c for c in reversed(subtree.children)
|
||||
if isinstance(c, Tree) and id(c) not in subtrees]
|
||||
|
||||
del queue
|
||||
return reversed(list(subtrees.values()))
|
||||
|
||||
def iter_subtrees_topdown(self):
|
||||
"""Breadth-first iteration.
|
||||
|
||||
Iterates over all the subtrees, return nodes in order like pretty() does.
|
||||
"""
|
||||
stack = [self]
|
||||
stack_append = stack.append
|
||||
stack_pop = stack.pop
|
||||
while stack:
|
||||
node = stack_pop()
|
||||
if not isinstance(node, Tree):
|
||||
continue
|
||||
yield node
|
||||
for child in reversed(node.children):
|
||||
stack_append(child)
|
||||
|
||||
def find_pred(self, pred: 'Callable[[Tree[_Leaf_T]], bool]') -> 'Iterator[Tree[_Leaf_T]]':
|
||||
"""Returns all nodes of the tree that evaluate pred(node) as true."""
|
||||
return filter(pred, self.iter_subtrees())
|
||||
|
||||
def find_data(self, data: str) -> 'Iterator[Tree[_Leaf_T]]':
|
||||
"""Returns all nodes of the tree whose data equals the given data."""
|
||||
return self.find_pred(lambda t: t.data == data)
|
||||
|
||||
###}
|
||||
|
||||
def expand_kids_by_data(self, *data_values):
|
||||
"""Expand (inline) children with any of the given data values. Returns True if anything changed"""
|
||||
changed = False
|
||||
for i in range(len(self.children)-1, -1, -1):
|
||||
child = self.children[i]
|
||||
if isinstance(child, Tree) and child.data in data_values:
|
||||
self.children[i:i+1] = child.children
|
||||
changed = True
|
||||
return changed
|
||||
|
||||
|
||||
def scan_values(self, pred: 'Callable[[Branch[_Leaf_T]], bool]') -> Iterator[_Leaf_T]:
|
||||
"""Return all values in the tree that evaluate pred(value) as true.
|
||||
|
||||
This can be used to find all the tokens in the tree.
|
||||
|
||||
Example:
|
||||
>>> all_tokens = tree.scan_values(lambda v: isinstance(v, Token))
|
||||
"""
|
||||
for c in self.children:
|
||||
if isinstance(c, Tree):
|
||||
for t in c.scan_values(pred):
|
||||
yield t
|
||||
else:
|
||||
if pred(c):
|
||||
yield c
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta)
|
||||
|
||||
def copy(self) -> 'Tree[_Leaf_T]':
|
||||
return type(self)(self.data, self.children)
|
||||
|
||||
def set(self, data: str, children: 'List[Branch[_Leaf_T]]') -> None:
|
||||
self.data = data
|
||||
self.children = children
|
||||
|
||||
|
||||
ParseTree = Tree['Token']
|
||||
|
||||
|
||||
class SlottedTree(Tree):
|
||||
__slots__ = 'data', 'children', 'rule', '_meta'
|
||||
|
||||
|
||||
def pydot__tree_to_png(tree: Tree, filename: str, rankdir: 'Literal["TB", "LR", "BT", "RL"]'="LR", **kwargs) -> None:
|
||||
graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
|
||||
graph.write_png(filename)
|
||||
|
||||
|
||||
def pydot__tree_to_dot(tree: Tree, filename, rankdir="LR", **kwargs):
|
||||
graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
|
||||
graph.write(filename)
|
||||
|
||||
|
||||
def pydot__tree_to_graph(tree: Tree, rankdir="LR", **kwargs):
|
||||
"""Creates a colorful image that represents the tree (data+children, without meta)
|
||||
|
||||
Possible values for `rankdir` are "TB", "LR", "BT", "RL", corresponding to
|
||||
directed graphs drawn from top to bottom, from left to right, from bottom to
|
||||
top, and from right to left, respectively.
|
||||
|
||||
`kwargs` can be any graph attribute (e. g. `dpi=200`). For a list of
|
||||
possible attributes, see https://www.graphviz.org/doc/info/attrs.html.
|
||||
"""
|
||||
|
||||
import pydot # type: ignore[import-not-found]
|
||||
graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs)
|
||||
|
||||
i = [0]
|
||||
|
||||
def new_leaf(leaf):
|
||||
node = pydot.Node(i[0], label=repr(leaf))
|
||||
i[0] += 1
|
||||
graph.add_node(node)
|
||||
return node
|
||||
|
||||
def _to_pydot(subtree):
|
||||
color = hash(subtree.data) & 0xffffff
|
||||
color |= 0x808080
|
||||
|
||||
subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child)
|
||||
for child in subtree.children]
|
||||
node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data)
|
||||
i[0] += 1
|
||||
graph.add_node(node)
|
||||
|
||||
for subnode in subnodes:
|
||||
graph.add_edge(pydot.Edge(node, subnode))
|
||||
|
||||
return node
|
||||
|
||||
_to_pydot(tree)
|
||||
return graph
|
||||
186
ccxt/static_dependencies/lark/tree_matcher.py
Normal file
186
ccxt/static_dependencies/lark/tree_matcher.py
Normal file
@@ -0,0 +1,186 @@
|
||||
"""Tree matcher based on Lark grammar"""
|
||||
|
||||
import re
|
||||
from collections import defaultdict
|
||||
|
||||
from . import Tree, Token
|
||||
from .common import ParserConf
|
||||
from .parsers import earley
|
||||
from .grammar import Rule, Terminal, NonTerminal
|
||||
|
||||
|
||||
def is_discarded_terminal(t):
|
||||
return t.is_term and t.filter_out
|
||||
|
||||
|
||||
class _MakeTreeMatch:
|
||||
def __init__(self, name, expansion):
|
||||
self.name = name
|
||||
self.expansion = expansion
|
||||
|
||||
def __call__(self, args):
|
||||
t = Tree(self.name, args)
|
||||
t.meta.match_tree = True
|
||||
t.meta.orig_expansion = self.expansion
|
||||
return t
|
||||
|
||||
|
||||
def _best_from_group(seq, group_key, cmp_key):
|
||||
d = {}
|
||||
for item in seq:
|
||||
key = group_key(item)
|
||||
if key in d:
|
||||
v1 = cmp_key(item)
|
||||
v2 = cmp_key(d[key])
|
||||
if v2 > v1:
|
||||
d[key] = item
|
||||
else:
|
||||
d[key] = item
|
||||
return list(d.values())
|
||||
|
||||
|
||||
def _best_rules_from_group(rules):
|
||||
rules = _best_from_group(rules, lambda r: r, lambda r: -len(r.expansion))
|
||||
rules.sort(key=lambda r: len(r.expansion))
|
||||
return rules
|
||||
|
||||
|
||||
def _match(term, token):
|
||||
if isinstance(token, Tree):
|
||||
name, _args = parse_rulename(term.name)
|
||||
return token.data == name
|
||||
elif isinstance(token, Token):
|
||||
return term == Terminal(token.type)
|
||||
assert False, (term, token)
|
||||
|
||||
|
||||
def make_recons_rule(origin, expansion, old_expansion):
|
||||
return Rule(origin, expansion, alias=_MakeTreeMatch(origin.name, old_expansion))
|
||||
|
||||
|
||||
def make_recons_rule_to_term(origin, term):
|
||||
return make_recons_rule(origin, [Terminal(term.name)], [term])
|
||||
|
||||
|
||||
def parse_rulename(s):
|
||||
"Parse rule names that may contain a template syntax (like rule{a, b, ...})"
|
||||
name, args_str = re.match(r'(\w+)(?:{(.+)})?', s).groups()
|
||||
args = args_str and [a.strip() for a in args_str.split(',')]
|
||||
return name, args
|
||||
|
||||
|
||||
|
||||
class ChildrenLexer:
|
||||
def __init__(self, children):
|
||||
self.children = children
|
||||
|
||||
def lex(self, parser_state):
|
||||
return self.children
|
||||
|
||||
class TreeMatcher:
|
||||
"""Match the elements of a tree node, based on an ontology
|
||||
provided by a Lark grammar.
|
||||
|
||||
Supports templates and inlined rules (`rule{a, b,..}` and `_rule`)
|
||||
|
||||
Initialize with an instance of Lark.
|
||||
"""
|
||||
|
||||
def __init__(self, parser):
|
||||
# XXX TODO calling compile twice returns different results!
|
||||
assert not parser.options.maybe_placeholders
|
||||
# XXX TODO: we just ignore the potential existence of a postlexer
|
||||
self.tokens, rules, _extra = parser.grammar.compile(parser.options.start, set())
|
||||
|
||||
self.rules_for_root = defaultdict(list)
|
||||
|
||||
self.rules = list(self._build_recons_rules(rules))
|
||||
self.rules.reverse()
|
||||
|
||||
# Choose the best rule from each group of {rule => [rule.alias]}, since we only really need one derivation.
|
||||
self.rules = _best_rules_from_group(self.rules)
|
||||
|
||||
self.parser = parser
|
||||
self._parser_cache = {}
|
||||
|
||||
def _build_recons_rules(self, rules):
|
||||
"Convert tree-parsing/construction rules to tree-matching rules"
|
||||
expand1s = {r.origin for r in rules if r.options.expand1}
|
||||
|
||||
aliases = defaultdict(list)
|
||||
for r in rules:
|
||||
if r.alias:
|
||||
aliases[r.origin].append(r.alias)
|
||||
|
||||
rule_names = {r.origin for r in rules}
|
||||
nonterminals = {sym for sym in rule_names
|
||||
if sym.name.startswith('_') or sym in expand1s or sym in aliases}
|
||||
|
||||
seen = set()
|
||||
for r in rules:
|
||||
recons_exp = [sym if sym in nonterminals else Terminal(sym.name)
|
||||
for sym in r.expansion if not is_discarded_terminal(sym)]
|
||||
|
||||
# Skip self-recursive constructs
|
||||
if recons_exp == [r.origin] and r.alias is None:
|
||||
continue
|
||||
|
||||
sym = NonTerminal(r.alias) if r.alias else r.origin
|
||||
rule = make_recons_rule(sym, recons_exp, r.expansion)
|
||||
|
||||
if sym in expand1s and len(recons_exp) != 1:
|
||||
self.rules_for_root[sym.name].append(rule)
|
||||
|
||||
if sym.name not in seen:
|
||||
yield make_recons_rule_to_term(sym, sym)
|
||||
seen.add(sym.name)
|
||||
else:
|
||||
if sym.name.startswith('_') or sym in expand1s:
|
||||
yield rule
|
||||
else:
|
||||
self.rules_for_root[sym.name].append(rule)
|
||||
|
||||
for origin, rule_aliases in aliases.items():
|
||||
for alias in rule_aliases:
|
||||
yield make_recons_rule_to_term(origin, NonTerminal(alias))
|
||||
yield make_recons_rule_to_term(origin, origin)
|
||||
|
||||
def match_tree(self, tree, rulename):
|
||||
"""Match the elements of `tree` to the symbols of rule `rulename`.
|
||||
|
||||
Parameters:
|
||||
tree (Tree): the tree node to match
|
||||
rulename (str): The expected full rule name (including template args)
|
||||
|
||||
Returns:
|
||||
Tree: an unreduced tree that matches `rulename`
|
||||
|
||||
Raises:
|
||||
UnexpectedToken: If no match was found.
|
||||
|
||||
Note:
|
||||
It's the callers' responsibility match the tree recursively.
|
||||
"""
|
||||
if rulename:
|
||||
# validate
|
||||
name, _args = parse_rulename(rulename)
|
||||
assert tree.data == name
|
||||
else:
|
||||
rulename = tree.data
|
||||
|
||||
# TODO: ambiguity?
|
||||
try:
|
||||
parser = self._parser_cache[rulename]
|
||||
except KeyError:
|
||||
rules = self.rules + _best_rules_from_group(self.rules_for_root[rulename])
|
||||
|
||||
# TODO pass callbacks through dict, instead of alias?
|
||||
callbacks = {rule: rule.alias for rule in rules}
|
||||
conf = ParserConf(rules, callbacks, [rulename])
|
||||
parser = earley.Parser(self.parser.lexer_conf, conf, _match, resolve_ambiguity=True)
|
||||
self._parser_cache[rulename] = parser
|
||||
|
||||
# find a full derivation
|
||||
unreduced_tree = parser.parse(ChildrenLexer(tree.children), rulename)
|
||||
assert unreduced_tree.data == rulename
|
||||
return unreduced_tree
|
||||
180
ccxt/static_dependencies/lark/tree_templates.py
Normal file
180
ccxt/static_dependencies/lark/tree_templates.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""This module defines utilities for matching and translation tree templates.
|
||||
|
||||
A tree templates is a tree that contains nodes that are template variables.
|
||||
|
||||
"""
|
||||
|
||||
from typing import Union, Optional, Mapping, Dict, Tuple, Iterator
|
||||
|
||||
from lark import Tree, Transformer
|
||||
from lark.exceptions import MissingVariableError
|
||||
|
||||
Branch = Union[Tree[str], str]
|
||||
TreeOrCode = Union[Tree[str], str]
|
||||
MatchResult = Dict[str, Tree]
|
||||
_TEMPLATE_MARKER = '$'
|
||||
|
||||
|
||||
class TemplateConf:
|
||||
"""Template Configuration
|
||||
|
||||
Allows customization for different uses of Template
|
||||
|
||||
parse() must return a Tree instance.
|
||||
"""
|
||||
|
||||
def __init__(self, parse=None):
|
||||
self._parse = parse
|
||||
|
||||
def test_var(self, var: Union[Tree[str], str]) -> Optional[str]:
|
||||
"""Given a tree node, if it is a template variable return its name. Otherwise, return None.
|
||||
|
||||
This method may be overridden for customization
|
||||
|
||||
Parameters:
|
||||
var: Tree | str - The tree node to test
|
||||
|
||||
"""
|
||||
if isinstance(var, str):
|
||||
return _get_template_name(var)
|
||||
|
||||
if (
|
||||
isinstance(var, Tree)
|
||||
and var.data == "var"
|
||||
and len(var.children) > 0
|
||||
and isinstance(var.children[0], str)
|
||||
):
|
||||
return _get_template_name(var.children[0])
|
||||
|
||||
return None
|
||||
|
||||
def _get_tree(self, template: TreeOrCode) -> Tree[str]:
|
||||
if isinstance(template, str):
|
||||
assert self._parse
|
||||
template = self._parse(template)
|
||||
|
||||
if not isinstance(template, Tree):
|
||||
raise TypeError("template parser must return a Tree instance")
|
||||
|
||||
return template
|
||||
|
||||
def __call__(self, template: Tree[str]) -> 'Template':
|
||||
return Template(template, conf=self)
|
||||
|
||||
def _match_tree_template(self, template: TreeOrCode, tree: Branch) -> Optional[MatchResult]:
|
||||
"""Returns dict of {var: match} if found a match, else None
|
||||
"""
|
||||
template_var = self.test_var(template)
|
||||
if template_var:
|
||||
if not isinstance(tree, Tree):
|
||||
raise TypeError(f"Template variables can only match Tree instances. Not {tree!r}")
|
||||
return {template_var: tree}
|
||||
|
||||
if isinstance(template, str):
|
||||
if template == tree:
|
||||
return {}
|
||||
return None
|
||||
|
||||
assert isinstance(template, Tree) and isinstance(tree, Tree), f"template={template} tree={tree}"
|
||||
|
||||
if template.data == tree.data and len(template.children) == len(tree.children):
|
||||
res = {}
|
||||
for t1, t2 in zip(template.children, tree.children):
|
||||
matches = self._match_tree_template(t1, t2)
|
||||
if matches is None:
|
||||
return None
|
||||
|
||||
res.update(matches)
|
||||
|
||||
return res
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class _ReplaceVars(Transformer[str, Tree[str]]):
|
||||
def __init__(self, conf: TemplateConf, vars: Mapping[str, Tree[str]]) -> None:
|
||||
super().__init__()
|
||||
self._conf = conf
|
||||
self._vars = vars
|
||||
|
||||
def __default__(self, data, children, meta) -> Tree[str]:
|
||||
tree = super().__default__(data, children, meta)
|
||||
|
||||
var = self._conf.test_var(tree)
|
||||
if var:
|
||||
try:
|
||||
return self._vars[var]
|
||||
except KeyError:
|
||||
raise MissingVariableError(f"No mapping for template variable ({var})")
|
||||
return tree
|
||||
|
||||
|
||||
class Template:
|
||||
"""Represents a tree template, tied to a specific configuration
|
||||
|
||||
A tree template is a tree that contains nodes that are template variables.
|
||||
Those variables will match any tree.
|
||||
(future versions may support annotations on the variables, to allow more complex templates)
|
||||
"""
|
||||
|
||||
def __init__(self, tree: Tree[str], conf: TemplateConf = TemplateConf()):
|
||||
self.conf = conf
|
||||
self.tree = conf._get_tree(tree)
|
||||
|
||||
def match(self, tree: TreeOrCode) -> Optional[MatchResult]:
|
||||
"""Match a tree template to a tree.
|
||||
|
||||
A tree template without variables will only match ``tree`` if it is equal to the template.
|
||||
|
||||
Parameters:
|
||||
tree (Tree): The tree to match to the template
|
||||
|
||||
Returns:
|
||||
Optional[Dict[str, Tree]]: If match is found, returns a dictionary mapping
|
||||
template variable names to their matching tree nodes.
|
||||
If no match was found, returns None.
|
||||
"""
|
||||
tree = self.conf._get_tree(tree)
|
||||
return self.conf._match_tree_template(self.tree, tree)
|
||||
|
||||
def search(self, tree: TreeOrCode) -> Iterator[Tuple[Tree[str], MatchResult]]:
|
||||
"""Search for all occurrences of the tree template inside ``tree``.
|
||||
"""
|
||||
tree = self.conf._get_tree(tree)
|
||||
for subtree in tree.iter_subtrees():
|
||||
res = self.match(subtree)
|
||||
if res:
|
||||
yield subtree, res
|
||||
|
||||
def apply_vars(self, vars: Mapping[str, Tree[str]]) -> Tree[str]:
|
||||
"""Apply vars to the template tree
|
||||
"""
|
||||
return _ReplaceVars(self.conf, vars).transform(self.tree)
|
||||
|
||||
|
||||
def translate(t1: Template, t2: Template, tree: TreeOrCode):
|
||||
"""Search tree and translate each occurrence of t1 into t2.
|
||||
"""
|
||||
tree = t1.conf._get_tree(tree) # ensure it's a tree, parse if necessary and possible
|
||||
for subtree, vars in t1.search(tree):
|
||||
res = t2.apply_vars(vars)
|
||||
subtree.set(res.data, res.children)
|
||||
return tree
|
||||
|
||||
|
||||
class TemplateTranslator:
|
||||
"""Utility class for translating a collection of patterns
|
||||
"""
|
||||
|
||||
def __init__(self, translations: Mapping[Template, Template]):
|
||||
assert all(isinstance(k, Template) and isinstance(v, Template) for k, v in translations.items())
|
||||
self.translations = translations
|
||||
|
||||
def translate(self, tree: Tree[str]):
|
||||
for k, v in self.translations.items():
|
||||
tree = translate(k, v, tree)
|
||||
return tree
|
||||
|
||||
|
||||
def _get_template_name(value: str) -> Optional[str]:
|
||||
return value.lstrip(_TEMPLATE_MARKER) if value.startswith(_TEMPLATE_MARKER) else None
|
||||
343
ccxt/static_dependencies/lark/utils.py
Normal file
343
ccxt/static_dependencies/lark/utils.py
Normal file
@@ -0,0 +1,343 @@
|
||||
import unicodedata
|
||||
import os
|
||||
from itertools import product
|
||||
from collections import deque
|
||||
from typing import Callable, Iterator, List, Optional, Tuple, Type, TypeVar, Union, Dict, Any, Sequence, Iterable, AbstractSet
|
||||
|
||||
###{standalone
|
||||
import sys, re
|
||||
import logging
|
||||
|
||||
logger: logging.Logger = logging.getLogger("lark")
|
||||
logger.addHandler(logging.StreamHandler())
|
||||
# Set to highest level, since we have some warnings amongst the code
|
||||
# By default, we should not output any log messages
|
||||
logger.setLevel(logging.CRITICAL)
|
||||
|
||||
|
||||
NO_VALUE = object()
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def classify(seq: Iterable, key: Optional[Callable] = None, value: Optional[Callable] = None) -> Dict:
|
||||
d: Dict[Any, Any] = {}
|
||||
for item in seq:
|
||||
k = key(item) if (key is not None) else item
|
||||
v = value(item) if (value is not None) else item
|
||||
try:
|
||||
d[k].append(v)
|
||||
except KeyError:
|
||||
d[k] = [v]
|
||||
return d
|
||||
|
||||
|
||||
def _deserialize(data: Any, namespace: Dict[str, Any], memo: Dict) -> Any:
|
||||
if isinstance(data, dict):
|
||||
if '__type__' in data: # Object
|
||||
class_ = namespace[data['__type__']]
|
||||
return class_.deserialize(data, memo)
|
||||
elif '@' in data:
|
||||
return memo[data['@']]
|
||||
return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
|
||||
elif isinstance(data, list):
|
||||
return [_deserialize(value, namespace, memo) for value in data]
|
||||
return data
|
||||
|
||||
|
||||
_T = TypeVar("_T", bound="Serialize")
|
||||
|
||||
class Serialize:
|
||||
"""Safe-ish serialization interface that doesn't rely on Pickle
|
||||
|
||||
Attributes:
|
||||
__serialize_fields__ (List[str]): Fields (aka attributes) to serialize.
|
||||
__serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate.
|
||||
Should include all field types that aren't builtin types.
|
||||
"""
|
||||
|
||||
def memo_serialize(self, types_to_memoize: List) -> Any:
|
||||
memo = SerializeMemoizer(types_to_memoize)
|
||||
return self.serialize(memo), memo.serialize()
|
||||
|
||||
def serialize(self, memo = None) -> Dict[str, Any]:
|
||||
if memo and memo.in_types(self):
|
||||
return {'@': memo.memoized.get(self)}
|
||||
|
||||
fields = getattr(self, '__serialize_fields__')
|
||||
res = {f: _serialize(getattr(self, f), memo) for f in fields}
|
||||
res['__type__'] = type(self).__name__
|
||||
if hasattr(self, '_serialize'):
|
||||
self._serialize(res, memo)
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
def deserialize(cls: Type[_T], data: Dict[str, Any], memo: Dict[int, Any]) -> _T:
|
||||
namespace = getattr(cls, '__serialize_namespace__', [])
|
||||
namespace = {c.__name__:c for c in namespace}
|
||||
|
||||
fields = getattr(cls, '__serialize_fields__')
|
||||
|
||||
if '@' in data:
|
||||
return memo[data['@']]
|
||||
|
||||
inst = cls.__new__(cls)
|
||||
for f in fields:
|
||||
try:
|
||||
setattr(inst, f, _deserialize(data[f], namespace, memo))
|
||||
except KeyError as e:
|
||||
raise KeyError("Cannot find key for class", cls, e)
|
||||
|
||||
if hasattr(inst, '_deserialize'):
|
||||
inst._deserialize()
|
||||
|
||||
return inst
|
||||
|
||||
|
||||
class SerializeMemoizer(Serialize):
|
||||
"A version of serialize that memoizes objects to reduce space"
|
||||
|
||||
__serialize_fields__ = 'memoized',
|
||||
|
||||
def __init__(self, types_to_memoize: List) -> None:
|
||||
self.types_to_memoize = tuple(types_to_memoize)
|
||||
self.memoized = Enumerator()
|
||||
|
||||
def in_types(self, value: Serialize) -> bool:
|
||||
return isinstance(value, self.types_to_memoize)
|
||||
|
||||
def serialize(self) -> Dict[int, Any]: # type: ignore[override]
|
||||
return _serialize(self.memoized.reversed(), None)
|
||||
|
||||
@classmethod
|
||||
def deserialize(cls, data: Dict[int, Any], namespace: Dict[str, Any], memo: Dict[Any, Any]) -> Dict[int, Any]: # type: ignore[override]
|
||||
return _deserialize(data, namespace, memo)
|
||||
|
||||
|
||||
try:
|
||||
import regex
|
||||
_has_regex = True
|
||||
except ImportError:
|
||||
_has_regex = False
|
||||
|
||||
if sys.version_info >= (3, 11):
|
||||
import re._parser as sre_parse
|
||||
import re._constants as sre_constants
|
||||
else:
|
||||
import sre_parse
|
||||
import sre_constants
|
||||
|
||||
categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
|
||||
|
||||
def get_regexp_width(expr: str) -> Union[Tuple[int, int], List[int]]:
|
||||
if _has_regex:
|
||||
# Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
|
||||
# a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
|
||||
# match here below.
|
||||
regexp_final = re.sub(categ_pattern, 'A', expr)
|
||||
else:
|
||||
if re.search(categ_pattern, expr):
|
||||
raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
|
||||
regexp_final = expr
|
||||
try:
|
||||
# Fixed in next version (past 0.960) of typeshed
|
||||
return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
|
||||
except sre_constants.error:
|
||||
if not _has_regex:
|
||||
raise ValueError(expr)
|
||||
else:
|
||||
# sre_parse does not support the new features in regex. To not completely fail in that case,
|
||||
# we manually test for the most important info (whether the empty string is matched)
|
||||
c = regex.compile(regexp_final)
|
||||
# Python 3.11.7 introducded sre_parse.MAXWIDTH that is used instead of MAXREPEAT
|
||||
# See lark-parser/lark#1376 and python/cpython#109859
|
||||
MAXWIDTH = getattr(sre_parse, "MAXWIDTH", sre_constants.MAXREPEAT)
|
||||
if c.match('') is None:
|
||||
# MAXREPEAT is a none pickable subclass of int, therefore needs to be converted to enable caching
|
||||
return 1, int(MAXWIDTH)
|
||||
else:
|
||||
return 0, int(MAXWIDTH)
|
||||
|
||||
###}
|
||||
|
||||
|
||||
_ID_START = 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc'
|
||||
_ID_CONTINUE = _ID_START + ('Nd', 'Nl',)
|
||||
|
||||
def _test_unicode_category(s: str, categories: Sequence[str]) -> bool:
|
||||
if len(s) != 1:
|
||||
return all(_test_unicode_category(char, categories) for char in s)
|
||||
return s == '_' or unicodedata.category(s) in categories
|
||||
|
||||
def is_id_continue(s: str) -> bool:
|
||||
"""
|
||||
Checks if all characters in `s` are alphanumeric characters (Unicode standard, so diacritics, indian vowels, non-latin
|
||||
numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier. See PEP 3131 for details.
|
||||
"""
|
||||
return _test_unicode_category(s, _ID_CONTINUE)
|
||||
|
||||
def is_id_start(s: str) -> bool:
|
||||
"""
|
||||
Checks if all characters in `s` are alphabetic characters (Unicode standard, so diacritics, indian vowels, non-latin
|
||||
numbers, etc. all pass). Synonymous with a Python `ID_START` identifier. See PEP 3131 for details.
|
||||
"""
|
||||
return _test_unicode_category(s, _ID_START)
|
||||
|
||||
|
||||
def dedup_list(l: Sequence[T]) -> List[T]:
|
||||
"""Given a list (l) will removing duplicates from the list,
|
||||
preserving the original order of the list. Assumes that
|
||||
the list entries are hashable."""
|
||||
return list(dict.fromkeys(l))
|
||||
|
||||
|
||||
class Enumerator(Serialize):
|
||||
def __init__(self) -> None:
|
||||
self.enums: Dict[Any, int] = {}
|
||||
|
||||
def get(self, item) -> int:
|
||||
if item not in self.enums:
|
||||
self.enums[item] = len(self.enums)
|
||||
return self.enums[item]
|
||||
|
||||
def __len__(self):
|
||||
return len(self.enums)
|
||||
|
||||
def reversed(self) -> Dict[int, Any]:
|
||||
r = {v: k for k, v in self.enums.items()}
|
||||
assert len(r) == len(self.enums)
|
||||
return r
|
||||
|
||||
|
||||
|
||||
def combine_alternatives(lists):
|
||||
"""
|
||||
Accepts a list of alternatives, and enumerates all their possible concatenations.
|
||||
|
||||
Examples:
|
||||
>>> combine_alternatives([range(2), [4,5]])
|
||||
[[0, 4], [0, 5], [1, 4], [1, 5]]
|
||||
|
||||
>>> combine_alternatives(["abc", "xy", '$'])
|
||||
[['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']]
|
||||
|
||||
>>> combine_alternatives([])
|
||||
[[]]
|
||||
"""
|
||||
if not lists:
|
||||
return [[]]
|
||||
assert all(l for l in lists), lists
|
||||
return list(product(*lists))
|
||||
|
||||
try:
|
||||
import atomicwrites
|
||||
_has_atomicwrites = True
|
||||
except ImportError:
|
||||
_has_atomicwrites = False
|
||||
|
||||
class FS:
|
||||
exists = staticmethod(os.path.exists)
|
||||
|
||||
@staticmethod
|
||||
def open(name, mode="r", **kwargs):
|
||||
if _has_atomicwrites and "w" in mode:
|
||||
return atomicwrites.atomic_write(name, mode=mode, overwrite=True, **kwargs)
|
||||
else:
|
||||
return open(name, mode, **kwargs)
|
||||
|
||||
|
||||
class fzset(frozenset):
|
||||
def __repr__(self):
|
||||
return '{%s}' % ', '.join(map(repr, self))
|
||||
|
||||
|
||||
def classify_bool(seq: Iterable, pred: Callable) -> Any:
|
||||
false_elems = []
|
||||
true_elems = [elem for elem in seq if pred(elem) or false_elems.append(elem)] # type: ignore[func-returns-value]
|
||||
return true_elems, false_elems
|
||||
|
||||
|
||||
def bfs(initial: Iterable, expand: Callable) -> Iterator:
|
||||
open_q = deque(list(initial))
|
||||
visited = set(open_q)
|
||||
while open_q:
|
||||
node = open_q.popleft()
|
||||
yield node
|
||||
for next_node in expand(node):
|
||||
if next_node not in visited:
|
||||
visited.add(next_node)
|
||||
open_q.append(next_node)
|
||||
|
||||
def bfs_all_unique(initial, expand):
|
||||
"bfs, but doesn't keep track of visited (aka seen), because there can be no repetitions"
|
||||
open_q = deque(list(initial))
|
||||
while open_q:
|
||||
node = open_q.popleft()
|
||||
yield node
|
||||
open_q += expand(node)
|
||||
|
||||
|
||||
def _serialize(value: Any, memo: Optional[SerializeMemoizer]) -> Any:
|
||||
if isinstance(value, Serialize):
|
||||
return value.serialize(memo)
|
||||
elif isinstance(value, list):
|
||||
return [_serialize(elem, memo) for elem in value]
|
||||
elif isinstance(value, frozenset):
|
||||
return list(value) # TODO reversible?
|
||||
elif isinstance(value, dict):
|
||||
return {key:_serialize(elem, memo) for key, elem in value.items()}
|
||||
# assert value is None or isinstance(value, (int, float, str, tuple)), value
|
||||
return value
|
||||
|
||||
|
||||
|
||||
|
||||
def small_factors(n: int, max_factor: int) -> List[Tuple[int, int]]:
|
||||
"""
|
||||
Splits n up into smaller factors and summands <= max_factor.
|
||||
Returns a list of [(a, b), ...]
|
||||
so that the following code returns n:
|
||||
|
||||
n = 1
|
||||
for a, b in values:
|
||||
n = n * a + b
|
||||
|
||||
Currently, we also keep a + b <= max_factor, but that might change
|
||||
"""
|
||||
assert n >= 0
|
||||
assert max_factor > 2
|
||||
if n <= max_factor:
|
||||
return [(n, 0)]
|
||||
|
||||
for a in range(max_factor, 1, -1):
|
||||
r, b = divmod(n, a)
|
||||
if a + b <= max_factor:
|
||||
return small_factors(r, max_factor) + [(a, b)]
|
||||
assert False, "Failed to factorize %s" % n
|
||||
|
||||
|
||||
class OrderedSet(AbstractSet[T]):
|
||||
"""A minimal OrderedSet implementation, using a dictionary.
|
||||
|
||||
(relies on the dictionary being ordered)
|
||||
"""
|
||||
def __init__(self, items: Iterable[T] =()):
|
||||
self.d = dict.fromkeys(items)
|
||||
|
||||
def __contains__(self, item: Any) -> bool:
|
||||
return item in self.d
|
||||
|
||||
def add(self, item: T):
|
||||
self.d[item] = None
|
||||
|
||||
def __iter__(self) -> Iterator[T]:
|
||||
return iter(self.d)
|
||||
|
||||
def remove(self, item: T):
|
||||
del self.d[item]
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.d)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.d)
|
||||
596
ccxt/static_dependencies/lark/visitors.py
Normal file
596
ccxt/static_dependencies/lark/visitors.py
Normal file
@@ -0,0 +1,596 @@
|
||||
from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union, Optional, Any, cast
|
||||
from abc import ABC
|
||||
|
||||
from .utils import combine_alternatives
|
||||
from .tree import Tree, Branch
|
||||
from .exceptions import VisitError, GrammarError
|
||||
from .lexer import Token
|
||||
|
||||
###{standalone
|
||||
from functools import wraps, update_wrapper
|
||||
from inspect import getmembers, getmro
|
||||
|
||||
_Return_T = TypeVar('_Return_T')
|
||||
_Return_V = TypeVar('_Return_V')
|
||||
_Leaf_T = TypeVar('_Leaf_T')
|
||||
_Leaf_U = TypeVar('_Leaf_U')
|
||||
_R = TypeVar('_R')
|
||||
_FUNC = Callable[..., _Return_T]
|
||||
_DECORATED = Union[_FUNC, type]
|
||||
|
||||
class _DiscardType:
|
||||
"""When the Discard value is returned from a transformer callback,
|
||||
that node is discarded and won't appear in the parent.
|
||||
|
||||
Note:
|
||||
This feature is disabled when the transformer is provided to Lark
|
||||
using the ``transformer`` keyword (aka Tree-less LALR mode).
|
||||
|
||||
Example:
|
||||
::
|
||||
|
||||
class T(Transformer):
|
||||
def ignore_tree(self, children):
|
||||
return Discard
|
||||
|
||||
def IGNORE_TOKEN(self, token):
|
||||
return Discard
|
||||
"""
|
||||
|
||||
def __repr__(self):
|
||||
return "lark.visitors.Discard"
|
||||
|
||||
Discard = _DiscardType()
|
||||
|
||||
# Transformers
|
||||
|
||||
class _Decoratable:
|
||||
"Provides support for decorating methods with @v_args"
|
||||
|
||||
@classmethod
|
||||
def _apply_v_args(cls, visit_wrapper):
|
||||
mro = getmro(cls)
|
||||
assert mro[0] is cls
|
||||
libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
|
||||
for name, value in getmembers(cls):
|
||||
|
||||
# Make sure the function isn't inherited (unless it's overwritten)
|
||||
if name.startswith('_') or (name in libmembers and name not in cls.__dict__):
|
||||
continue
|
||||
if not callable(value):
|
||||
continue
|
||||
|
||||
# Skip if v_args already applied (at the function level)
|
||||
if isinstance(cls.__dict__[name], _VArgsWrapper):
|
||||
continue
|
||||
|
||||
setattr(cls, name, _VArgsWrapper(cls.__dict__[name], visit_wrapper))
|
||||
return cls
|
||||
|
||||
def __class_getitem__(cls, _):
|
||||
return cls
|
||||
|
||||
|
||||
class Transformer(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
|
||||
"""Transformers work bottom-up (or depth-first), starting with visiting the leaves and working
|
||||
their way up until ending at the root of the tree.
|
||||
|
||||
For each node visited, the transformer will call the appropriate method (callbacks), according to the
|
||||
node's ``data``, and use the returned value to replace the node, thereby creating a new tree structure.
|
||||
|
||||
Transformers can be used to implement map & reduce patterns. Because nodes are reduced from leaf to root,
|
||||
at any point the callbacks may assume the children have already been transformed (if applicable).
|
||||
|
||||
If the transformer cannot find a method with the right name, it will instead call ``__default__``, which by
|
||||
default creates a copy of the node.
|
||||
|
||||
To discard a node, return Discard (``lark.visitors.Discard``).
|
||||
|
||||
``Transformer`` can do anything ``Visitor`` can do, but because it reconstructs the tree,
|
||||
it is slightly less efficient.
|
||||
|
||||
A transformer without methods essentially performs a non-memoized partial deepcopy.
|
||||
|
||||
All these classes implement the transformer interface:
|
||||
|
||||
- ``Transformer`` - Recursively transforms the tree. This is the one you probably want.
|
||||
- ``Transformer_InPlace`` - Non-recursive. Changes the tree in-place instead of returning new instances
|
||||
- ``Transformer_InPlaceRecursive`` - Recursive. Changes the tree in-place instead of returning new instances
|
||||
|
||||
Parameters:
|
||||
visit_tokens (bool, optional): Should the transformer visit tokens in addition to rules.
|
||||
Setting this to ``False`` is slightly faster. Defaults to ``True``.
|
||||
(For processing ignored tokens, use the ``lexer_callbacks`` options)
|
||||
|
||||
"""
|
||||
__visit_tokens__ = True # For backwards compatibility
|
||||
|
||||
def __init__(self, visit_tokens: bool=True) -> None:
|
||||
self.__visit_tokens__ = visit_tokens
|
||||
|
||||
def _call_userfunc(self, tree, new_children=None):
|
||||
# Assumes tree is already transformed
|
||||
children = new_children if new_children is not None else tree.children
|
||||
try:
|
||||
f = getattr(self, tree.data)
|
||||
except AttributeError:
|
||||
return self.__default__(tree.data, children, tree.meta)
|
||||
else:
|
||||
try:
|
||||
wrapper = getattr(f, 'visit_wrapper', None)
|
||||
if wrapper is not None:
|
||||
return f.visit_wrapper(f, tree.data, children, tree.meta)
|
||||
else:
|
||||
return f(children)
|
||||
except GrammarError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise VisitError(tree.data, tree, e)
|
||||
|
||||
def _call_userfunc_token(self, token):
|
||||
try:
|
||||
f = getattr(self, token.type)
|
||||
except AttributeError:
|
||||
return self.__default_token__(token)
|
||||
else:
|
||||
try:
|
||||
return f(token)
|
||||
except GrammarError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise VisitError(token.type, token, e)
|
||||
|
||||
def _transform_children(self, children):
|
||||
for c in children:
|
||||
if isinstance(c, Tree):
|
||||
res = self._transform_tree(c)
|
||||
elif self.__visit_tokens__ and isinstance(c, Token):
|
||||
res = self._call_userfunc_token(c)
|
||||
else:
|
||||
res = c
|
||||
|
||||
if res is not Discard:
|
||||
yield res
|
||||
|
||||
def _transform_tree(self, tree):
|
||||
children = list(self._transform_children(tree.children))
|
||||
return self._call_userfunc(tree, children)
|
||||
|
||||
def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
|
||||
"Transform the given tree, and return the final result"
|
||||
res = list(self._transform_children([tree]))
|
||||
if not res:
|
||||
return None # type: ignore[return-value]
|
||||
assert len(res) == 1
|
||||
return res[0]
|
||||
|
||||
def __mul__(
|
||||
self: 'Transformer[_Leaf_T, Tree[_Leaf_U]]',
|
||||
other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V,]]'
|
||||
) -> 'TransformerChain[_Leaf_T, _Return_V]':
|
||||
"""Chain two transformers together, returning a new transformer.
|
||||
"""
|
||||
return TransformerChain(self, other)
|
||||
|
||||
def __default__(self, data, children, meta):
|
||||
"""Default function that is called if there is no attribute matching ``data``
|
||||
|
||||
Can be overridden. Defaults to creating a new copy of the tree node (i.e. ``return Tree(data, children, meta)``)
|
||||
"""
|
||||
return Tree(data, children, meta)
|
||||
|
||||
def __default_token__(self, token):
|
||||
"""Default function that is called if there is no attribute matching ``token.type``
|
||||
|
||||
Can be overridden. Defaults to returning the token as-is.
|
||||
"""
|
||||
return token
|
||||
|
||||
|
||||
def merge_transformers(base_transformer=None, **transformers_to_merge):
|
||||
"""Merge a collection of transformers into the base_transformer, each into its own 'namespace'.
|
||||
|
||||
When called, it will collect the methods from each transformer, and assign them to base_transformer,
|
||||
with their name prefixed with the given keyword, as ``prefix__methodname``.
|
||||
|
||||
This function is especially useful for processing grammars that import other grammars,
|
||||
thereby creating some of their rules in a 'namespace'. (i.e with a consistent name prefix).
|
||||
In this case, the key for the transformer should match the name of the imported grammar.
|
||||
|
||||
Parameters:
|
||||
base_transformer (Transformer, optional): The transformer that all other transformers will be added to.
|
||||
**transformers_to_merge: Keyword arguments, in the form of ``name_prefix = transformer``.
|
||||
|
||||
Raises:
|
||||
AttributeError: In case of a name collision in the merged methods
|
||||
|
||||
Example:
|
||||
::
|
||||
|
||||
class TBase(Transformer):
|
||||
def start(self, children):
|
||||
return children[0] + 'bar'
|
||||
|
||||
class TImportedGrammar(Transformer):
|
||||
def foo(self, children):
|
||||
return "foo"
|
||||
|
||||
composed_transformer = merge_transformers(TBase(), imported=TImportedGrammar())
|
||||
|
||||
t = Tree('start', [ Tree('imported__foo', []) ])
|
||||
|
||||
assert composed_transformer.transform(t) == 'foobar'
|
||||
|
||||
"""
|
||||
if base_transformer is None:
|
||||
base_transformer = Transformer()
|
||||
for prefix, transformer in transformers_to_merge.items():
|
||||
for method_name in dir(transformer):
|
||||
method = getattr(transformer, method_name)
|
||||
if not callable(method):
|
||||
continue
|
||||
if method_name.startswith("_") or method_name == "transform":
|
||||
continue
|
||||
prefixed_method = prefix + "__" + method_name
|
||||
if hasattr(base_transformer, prefixed_method):
|
||||
raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method)
|
||||
|
||||
setattr(base_transformer, prefixed_method, method)
|
||||
|
||||
return base_transformer
|
||||
|
||||
|
||||
class InlineTransformer(Transformer): # XXX Deprecated
|
||||
def _call_userfunc(self, tree, new_children=None):
|
||||
# Assumes tree is already transformed
|
||||
children = new_children if new_children is not None else tree.children
|
||||
try:
|
||||
f = getattr(self, tree.data)
|
||||
except AttributeError:
|
||||
return self.__default__(tree.data, children, tree.meta)
|
||||
else:
|
||||
return f(*children)
|
||||
|
||||
|
||||
class TransformerChain(Generic[_Leaf_T, _Return_T]):
|
||||
|
||||
transformers: 'Tuple[Union[Transformer, TransformerChain], ...]'
|
||||
|
||||
def __init__(self, *transformers: 'Union[Transformer, TransformerChain]') -> None:
|
||||
self.transformers = transformers
|
||||
|
||||
def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
|
||||
for t in self.transformers:
|
||||
tree = t.transform(tree)
|
||||
return cast(_Return_T, tree)
|
||||
|
||||
def __mul__(
|
||||
self: 'TransformerChain[_Leaf_T, Tree[_Leaf_U]]',
|
||||
other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V]]'
|
||||
) -> 'TransformerChain[_Leaf_T, _Return_V]':
|
||||
return TransformerChain(*self.transformers + (other,))
|
||||
|
||||
|
||||
class Transformer_InPlace(Transformer[_Leaf_T, _Return_T]):
|
||||
"""Same as Transformer, but non-recursive, and changes the tree in-place instead of returning new instances
|
||||
|
||||
Useful for huge trees. Conservative in memory.
|
||||
"""
|
||||
def _transform_tree(self, tree): # Cancel recursion
|
||||
return self._call_userfunc(tree)
|
||||
|
||||
def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
|
||||
for subtree in tree.iter_subtrees():
|
||||
subtree.children = list(self._transform_children(subtree.children))
|
||||
|
||||
return self._transform_tree(tree)
|
||||
|
||||
|
||||
class Transformer_NonRecursive(Transformer[_Leaf_T, _Return_T]):
|
||||
"""Same as Transformer but non-recursive.
|
||||
|
||||
Like Transformer, it doesn't change the original tree.
|
||||
|
||||
Useful for huge trees.
|
||||
"""
|
||||
|
||||
def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
|
||||
# Tree to postfix
|
||||
rev_postfix = []
|
||||
q: List[Branch[_Leaf_T]] = [tree]
|
||||
while q:
|
||||
t = q.pop()
|
||||
rev_postfix.append(t)
|
||||
if isinstance(t, Tree):
|
||||
q += t.children
|
||||
|
||||
# Postfix to tree
|
||||
stack: List = []
|
||||
for x in reversed(rev_postfix):
|
||||
if isinstance(x, Tree):
|
||||
size = len(x.children)
|
||||
if size:
|
||||
args = stack[-size:]
|
||||
del stack[-size:]
|
||||
else:
|
||||
args = []
|
||||
|
||||
res = self._call_userfunc(x, args)
|
||||
if res is not Discard:
|
||||
stack.append(res)
|
||||
|
||||
elif self.__visit_tokens__ and isinstance(x, Token):
|
||||
res = self._call_userfunc_token(x)
|
||||
if res is not Discard:
|
||||
stack.append(res)
|
||||
else:
|
||||
stack.append(x)
|
||||
|
||||
result, = stack # We should have only one tree remaining
|
||||
# There are no guarantees on the type of the value produced by calling a user func for a
|
||||
# child will produce. This means type system can't statically know that the final result is
|
||||
# _Return_T. As a result a cast is required.
|
||||
return cast(_Return_T, result)
|
||||
|
||||
|
||||
class Transformer_InPlaceRecursive(Transformer):
|
||||
"Same as Transformer, recursive, but changes the tree in-place instead of returning new instances"
|
||||
def _transform_tree(self, tree):
|
||||
tree.children = list(self._transform_children(tree.children))
|
||||
return self._call_userfunc(tree)
|
||||
|
||||
|
||||
# Visitors
|
||||
|
||||
class VisitorBase:
|
||||
def _call_userfunc(self, tree):
|
||||
return getattr(self, tree.data, self.__default__)(tree)
|
||||
|
||||
def __default__(self, tree):
|
||||
"""Default function that is called if there is no attribute matching ``tree.data``
|
||||
|
||||
Can be overridden. Defaults to doing nothing.
|
||||
"""
|
||||
return tree
|
||||
|
||||
def __class_getitem__(cls, _):
|
||||
return cls
|
||||
|
||||
|
||||
class Visitor(VisitorBase, ABC, Generic[_Leaf_T]):
|
||||
"""Tree visitor, non-recursive (can handle huge trees).
|
||||
|
||||
Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data``
|
||||
"""
|
||||
|
||||
def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
|
||||
"Visits the tree, starting with the leaves and finally the root (bottom-up)"
|
||||
for subtree in tree.iter_subtrees():
|
||||
self._call_userfunc(subtree)
|
||||
return tree
|
||||
|
||||
def visit_topdown(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
|
||||
"Visit the tree, starting at the root, and ending at the leaves (top-down)"
|
||||
for subtree in tree.iter_subtrees_topdown():
|
||||
self._call_userfunc(subtree)
|
||||
return tree
|
||||
|
||||
|
||||
class Visitor_Recursive(VisitorBase, Generic[_Leaf_T]):
|
||||
"""Bottom-up visitor, recursive.
|
||||
|
||||
Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data``
|
||||
|
||||
Slightly faster than the non-recursive version.
|
||||
"""
|
||||
|
||||
def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
|
||||
"Visits the tree, starting with the leaves and finally the root (bottom-up)"
|
||||
for child in tree.children:
|
||||
if isinstance(child, Tree):
|
||||
self.visit(child)
|
||||
|
||||
self._call_userfunc(tree)
|
||||
return tree
|
||||
|
||||
def visit_topdown(self,tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
|
||||
"Visit the tree, starting at the root, and ending at the leaves (top-down)"
|
||||
self._call_userfunc(tree)
|
||||
|
||||
for child in tree.children:
|
||||
if isinstance(child, Tree):
|
||||
self.visit_topdown(child)
|
||||
|
||||
return tree
|
||||
|
||||
|
||||
class Interpreter(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
|
||||
"""Interpreter walks the tree starting at the root.
|
||||
|
||||
Visits the tree, starting with the root and finally the leaves (top-down)
|
||||
|
||||
For each tree node, it calls its methods (provided by user via inheritance) according to ``tree.data``.
|
||||
|
||||
Unlike ``Transformer`` and ``Visitor``, the Interpreter doesn't automatically visit its sub-branches.
|
||||
The user has to explicitly call ``visit``, ``visit_children``, or use the ``@visit_children_decor``.
|
||||
This allows the user to implement branching and loops.
|
||||
"""
|
||||
|
||||
def visit(self, tree: Tree[_Leaf_T]) -> _Return_T:
|
||||
# There are no guarantees on the type of the value produced by calling a user func for a
|
||||
# child will produce. So only annotate the public method and use an internal method when
|
||||
# visiting child trees.
|
||||
return self._visit_tree(tree)
|
||||
|
||||
def _visit_tree(self, tree: Tree[_Leaf_T]):
|
||||
f = getattr(self, tree.data)
|
||||
wrapper = getattr(f, 'visit_wrapper', None)
|
||||
if wrapper is not None:
|
||||
return f.visit_wrapper(f, tree.data, tree.children, tree.meta)
|
||||
else:
|
||||
return f(tree)
|
||||
|
||||
def visit_children(self, tree: Tree[_Leaf_T]) -> List:
|
||||
return [self._visit_tree(child) if isinstance(child, Tree) else child
|
||||
for child in tree.children]
|
||||
|
||||
def __getattr__(self, name):
|
||||
return self.__default__
|
||||
|
||||
def __default__(self, tree):
|
||||
return self.visit_children(tree)
|
||||
|
||||
|
||||
_InterMethod = Callable[[Type[Interpreter], _Return_T], _R]
|
||||
|
||||
def visit_children_decor(func: _InterMethod) -> _InterMethod:
|
||||
"See Interpreter"
|
||||
@wraps(func)
|
||||
def inner(cls, tree):
|
||||
values = cls.visit_children(tree)
|
||||
return func(cls, values)
|
||||
return inner
|
||||
|
||||
# Decorators
|
||||
|
||||
def _apply_v_args(obj, visit_wrapper):
|
||||
try:
|
||||
_apply = obj._apply_v_args
|
||||
except AttributeError:
|
||||
return _VArgsWrapper(obj, visit_wrapper)
|
||||
else:
|
||||
return _apply(visit_wrapper)
|
||||
|
||||
|
||||
class _VArgsWrapper:
|
||||
"""
|
||||
A wrapper around a Callable. It delegates `__call__` to the Callable.
|
||||
If the Callable has a `__get__`, that is also delegate and the resulting function is wrapped.
|
||||
Otherwise, we use the original function mirroring the behaviour without a __get__.
|
||||
We also have the visit_wrapper attribute to be used by Transformers.
|
||||
"""
|
||||
base_func: Callable
|
||||
|
||||
def __init__(self, func: Callable, visit_wrapper: Callable[[Callable, str, list, Any], Any]):
|
||||
if isinstance(func, _VArgsWrapper):
|
||||
func = func.base_func
|
||||
self.base_func = func
|
||||
self.visit_wrapper = visit_wrapper
|
||||
update_wrapper(self, func)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return self.base_func(*args, **kwargs)
|
||||
|
||||
def __get__(self, instance, owner=None):
|
||||
try:
|
||||
# Use the __get__ attribute of the type instead of the instance
|
||||
# to fully mirror the behavior of getattr
|
||||
g = type(self.base_func).__get__
|
||||
except AttributeError:
|
||||
return self
|
||||
else:
|
||||
return _VArgsWrapper(g(self.base_func, instance, owner), self.visit_wrapper)
|
||||
|
||||
def __set_name__(self, owner, name):
|
||||
try:
|
||||
f = type(self.base_func).__set_name__
|
||||
except AttributeError:
|
||||
return
|
||||
else:
|
||||
f(self.base_func, owner, name)
|
||||
|
||||
|
||||
def _vargs_inline(f, _data, children, _meta):
|
||||
return f(*children)
|
||||
def _vargs_meta_inline(f, _data, children, meta):
|
||||
return f(meta, *children)
|
||||
def _vargs_meta(f, _data, children, meta):
|
||||
return f(meta, children)
|
||||
def _vargs_tree(f, data, children, meta):
|
||||
return f(Tree(data, children, meta))
|
||||
|
||||
|
||||
def v_args(inline: bool = False, meta: bool = False, tree: bool = False, wrapper: Optional[Callable] = None) -> Callable[[_DECORATED], _DECORATED]:
|
||||
"""A convenience decorator factory for modifying the behavior of user-supplied visitor methods.
|
||||
|
||||
By default, callback methods of transformers/visitors accept one argument - a list of the node's children.
|
||||
|
||||
``v_args`` can modify this behavior. When used on a transformer/visitor class definition,
|
||||
it applies to all the callback methods inside it.
|
||||
|
||||
``v_args`` can be applied to a single method, or to an entire class. When applied to both,
|
||||
the options given to the method take precedence.
|
||||
|
||||
Parameters:
|
||||
inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists).
|
||||
meta (bool, optional): Provides two arguments: ``meta`` and ``children`` (instead of just the latter)
|
||||
tree (bool, optional): Provides the entire tree as the argument, instead of the children.
|
||||
wrapper (function, optional): Provide a function to decorate all methods.
|
||||
|
||||
Example:
|
||||
::
|
||||
|
||||
@v_args(inline=True)
|
||||
class SolveArith(Transformer):
|
||||
def add(self, left, right):
|
||||
return left + right
|
||||
|
||||
@v_args(meta=True)
|
||||
def mul(self, meta, children):
|
||||
logger.info(f'mul at line {meta.line}')
|
||||
left, right = children
|
||||
return left * right
|
||||
|
||||
|
||||
class ReverseNotation(Transformer_InPlace):
|
||||
@v_args(tree=True)
|
||||
def tree_node(self, tree):
|
||||
tree.children = tree.children[::-1]
|
||||
"""
|
||||
if tree and (meta or inline):
|
||||
raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.")
|
||||
|
||||
func = None
|
||||
if meta:
|
||||
if inline:
|
||||
func = _vargs_meta_inline
|
||||
else:
|
||||
func = _vargs_meta
|
||||
elif inline:
|
||||
func = _vargs_inline
|
||||
elif tree:
|
||||
func = _vargs_tree
|
||||
|
||||
if wrapper is not None:
|
||||
if func is not None:
|
||||
raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.")
|
||||
func = wrapper
|
||||
|
||||
def _visitor_args_dec(obj):
|
||||
return _apply_v_args(obj, func)
|
||||
return _visitor_args_dec
|
||||
|
||||
|
||||
###}
|
||||
|
||||
|
||||
# --- Visitor Utilities ---
|
||||
|
||||
class CollapseAmbiguities(Transformer):
|
||||
"""
|
||||
Transforms a tree that contains any number of _ambig nodes into a list of trees,
|
||||
each one containing an unambiguous tree.
|
||||
|
||||
The length of the resulting list is the product of the length of all _ambig nodes.
|
||||
|
||||
Warning: This may quickly explode for highly ambiguous trees.
|
||||
|
||||
"""
|
||||
def _ambig(self, options):
|
||||
return sum(options, [])
|
||||
|
||||
def __default__(self, data, children_lists, meta):
|
||||
return [Tree(data, children, meta) for children in combine_alternatives(children_lists)]
|
||||
|
||||
def __default_token__(self, t):
|
||||
return [t]
|
||||
Reference in New Issue
Block a user