add
This commit is contained in:
0
ccxt/static_dependencies/lark/parsers/__init__.py
Normal file
0
ccxt/static_dependencies/lark/parsers/__init__.py
Normal file
340
ccxt/static_dependencies/lark/parsers/cyk.py
Normal file
340
ccxt/static_dependencies/lark/parsers/cyk.py
Normal file
@@ -0,0 +1,340 @@
|
||||
"""This module implements a CYK parser."""
|
||||
|
||||
# Author: https://github.com/ehudt (2018)
|
||||
#
|
||||
# Adapted by Erez
|
||||
|
||||
|
||||
from collections import defaultdict
|
||||
import itertools
|
||||
|
||||
from ..exceptions import ParseError
|
||||
from ..lexer import Token
|
||||
from ..tree import Tree
|
||||
from ..grammar import Terminal as T, NonTerminal as NT, Symbol
|
||||
|
||||
def match(t, s):
|
||||
assert isinstance(t, T)
|
||||
return t.name == s.type
|
||||
|
||||
|
||||
class Rule:
|
||||
"""Context-free grammar rule."""
|
||||
|
||||
def __init__(self, lhs, rhs, weight, alias):
|
||||
super(Rule, self).__init__()
|
||||
assert isinstance(lhs, NT), lhs
|
||||
assert all(isinstance(x, NT) or isinstance(x, T) for x in rhs), rhs
|
||||
self.lhs = lhs
|
||||
self.rhs = rhs
|
||||
self.weight = weight
|
||||
self.alias = alias
|
||||
|
||||
def __str__(self):
|
||||
return '%s -> %s' % (str(self.lhs), ' '.join(str(x) for x in self.rhs))
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.lhs, tuple(self.rhs)))
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.lhs == other.lhs and self.rhs == other.rhs
|
||||
|
||||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
|
||||
class Grammar:
|
||||
"""Context-free grammar."""
|
||||
|
||||
def __init__(self, rules):
|
||||
self.rules = frozenset(rules)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.rules == other.rules
|
||||
|
||||
def __str__(self):
|
||||
return '\n' + '\n'.join(sorted(repr(x) for x in self.rules)) + '\n'
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
|
||||
# Parse tree data structures
|
||||
class RuleNode:
|
||||
"""A node in the parse tree, which also contains the full rhs rule."""
|
||||
|
||||
def __init__(self, rule, children, weight=0):
|
||||
self.rule = rule
|
||||
self.children = children
|
||||
self.weight = weight
|
||||
|
||||
def __repr__(self):
|
||||
return 'RuleNode(%s, [%s])' % (repr(self.rule.lhs), ', '.join(str(x) for x in self.children))
|
||||
|
||||
|
||||
|
||||
class Parser:
|
||||
"""Parser wrapper."""
|
||||
|
||||
def __init__(self, rules):
|
||||
super(Parser, self).__init__()
|
||||
self.orig_rules = {rule: rule for rule in rules}
|
||||
rules = [self._to_rule(rule) for rule in rules]
|
||||
self.grammar = to_cnf(Grammar(rules))
|
||||
|
||||
def _to_rule(self, lark_rule):
|
||||
"""Converts a lark rule, (lhs, rhs, callback, options), to a Rule."""
|
||||
assert isinstance(lark_rule.origin, NT)
|
||||
assert all(isinstance(x, Symbol) for x in lark_rule.expansion)
|
||||
return Rule(
|
||||
lark_rule.origin, lark_rule.expansion,
|
||||
weight=lark_rule.options.priority if lark_rule.options.priority else 0,
|
||||
alias=lark_rule)
|
||||
|
||||
def parse(self, tokenized, start): # pylint: disable=invalid-name
|
||||
"""Parses input, which is a list of tokens."""
|
||||
assert start
|
||||
start = NT(start)
|
||||
|
||||
table, trees = _parse(tokenized, self.grammar)
|
||||
# Check if the parse succeeded.
|
||||
if all(r.lhs != start for r in table[(0, len(tokenized) - 1)]):
|
||||
raise ParseError('Parsing failed.')
|
||||
parse = trees[(0, len(tokenized) - 1)][start]
|
||||
return self._to_tree(revert_cnf(parse))
|
||||
|
||||
def _to_tree(self, rule_node):
|
||||
"""Converts a RuleNode parse tree to a lark Tree."""
|
||||
orig_rule = self.orig_rules[rule_node.rule.alias]
|
||||
children = []
|
||||
for child in rule_node.children:
|
||||
if isinstance(child, RuleNode):
|
||||
children.append(self._to_tree(child))
|
||||
else:
|
||||
assert isinstance(child.name, Token)
|
||||
children.append(child.name)
|
||||
t = Tree(orig_rule.origin, children)
|
||||
t.rule=orig_rule
|
||||
return t
|
||||
|
||||
|
||||
def print_parse(node, indent=0):
|
||||
if isinstance(node, RuleNode):
|
||||
print(' ' * (indent * 2) + str(node.rule.lhs))
|
||||
for child in node.children:
|
||||
print_parse(child, indent + 1)
|
||||
else:
|
||||
print(' ' * (indent * 2) + str(node.s))
|
||||
|
||||
|
||||
def _parse(s, g):
|
||||
"""Parses sentence 's' using CNF grammar 'g'."""
|
||||
# The CYK table. Indexed with a 2-tuple: (start pos, end pos)
|
||||
table = defaultdict(set)
|
||||
# Top-level structure is similar to the CYK table. Each cell is a dict from
|
||||
# rule name to the best (lightest) tree for that rule.
|
||||
trees = defaultdict(dict)
|
||||
# Populate base case with existing terminal production rules
|
||||
for i, w in enumerate(s):
|
||||
for terminal, rules in g.terminal_rules.items():
|
||||
if match(terminal, w):
|
||||
for rule in rules:
|
||||
table[(i, i)].add(rule)
|
||||
if (rule.lhs not in trees[(i, i)] or
|
||||
rule.weight < trees[(i, i)][rule.lhs].weight):
|
||||
trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight)
|
||||
|
||||
# Iterate over lengths of sub-sentences
|
||||
for l in range(2, len(s) + 1):
|
||||
# Iterate over sub-sentences with the given length
|
||||
for i in range(len(s) - l + 1):
|
||||
# Choose partition of the sub-sentence in [1, l)
|
||||
for p in range(i + 1, i + l):
|
||||
span1 = (i, p - 1)
|
||||
span2 = (p, i + l - 1)
|
||||
for r1, r2 in itertools.product(table[span1], table[span2]):
|
||||
for rule in g.nonterminal_rules.get((r1.lhs, r2.lhs), []):
|
||||
table[(i, i + l - 1)].add(rule)
|
||||
r1_tree = trees[span1][r1.lhs]
|
||||
r2_tree = trees[span2][r2.lhs]
|
||||
rule_total_weight = rule.weight + r1_tree.weight + r2_tree.weight
|
||||
if (rule.lhs not in trees[(i, i + l - 1)]
|
||||
or rule_total_weight < trees[(i, i + l - 1)][rule.lhs].weight):
|
||||
trees[(i, i + l - 1)][rule.lhs] = RuleNode(rule, [r1_tree, r2_tree], weight=rule_total_weight)
|
||||
return table, trees
|
||||
|
||||
|
||||
# This section implements context-free grammar converter to Chomsky normal form.
|
||||
# It also implements a conversion of parse trees from its CNF to the original
|
||||
# grammar.
|
||||
# Overview:
|
||||
# Applies the following operations in this order:
|
||||
# * TERM: Eliminates non-solitary terminals from all rules
|
||||
# * BIN: Eliminates rules with more than 2 symbols on their right-hand-side.
|
||||
# * UNIT: Eliminates non-terminal unit rules
|
||||
#
|
||||
# The following grammar characteristics aren't featured:
|
||||
# * Start symbol appears on RHS
|
||||
# * Empty rules (epsilon rules)
|
||||
|
||||
|
||||
class CnfWrapper:
|
||||
"""CNF wrapper for grammar.
|
||||
|
||||
Validates that the input grammar is CNF and provides helper data structures.
|
||||
"""
|
||||
|
||||
def __init__(self, grammar):
|
||||
super(CnfWrapper, self).__init__()
|
||||
self.grammar = grammar
|
||||
self.rules = grammar.rules
|
||||
self.terminal_rules = defaultdict(list)
|
||||
self.nonterminal_rules = defaultdict(list)
|
||||
for r in self.rules:
|
||||
# Validate that the grammar is CNF and populate auxiliary data structures.
|
||||
assert isinstance(r.lhs, NT), r
|
||||
if len(r.rhs) not in [1, 2]:
|
||||
raise ParseError("CYK doesn't support empty rules")
|
||||
if len(r.rhs) == 1 and isinstance(r.rhs[0], T):
|
||||
self.terminal_rules[r.rhs[0]].append(r)
|
||||
elif len(r.rhs) == 2 and all(isinstance(x, NT) for x in r.rhs):
|
||||
self.nonterminal_rules[tuple(r.rhs)].append(r)
|
||||
else:
|
||||
assert False, r
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.grammar == other.grammar
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self.grammar)
|
||||
|
||||
|
||||
class UnitSkipRule(Rule):
|
||||
"""A rule that records NTs that were skipped during transformation."""
|
||||
|
||||
def __init__(self, lhs, rhs, skipped_rules, weight, alias):
|
||||
super(UnitSkipRule, self).__init__(lhs, rhs, weight, alias)
|
||||
self.skipped_rules = skipped_rules
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, type(self)) and self.skipped_rules == other.skipped_rules
|
||||
|
||||
__hash__ = Rule.__hash__
|
||||
|
||||
|
||||
def build_unit_skiprule(unit_rule, target_rule):
|
||||
skipped_rules = []
|
||||
if isinstance(unit_rule, UnitSkipRule):
|
||||
skipped_rules += unit_rule.skipped_rules
|
||||
skipped_rules.append(target_rule)
|
||||
if isinstance(target_rule, UnitSkipRule):
|
||||
skipped_rules += target_rule.skipped_rules
|
||||
return UnitSkipRule(unit_rule.lhs, target_rule.rhs, skipped_rules,
|
||||
weight=unit_rule.weight + target_rule.weight, alias=unit_rule.alias)
|
||||
|
||||
|
||||
def get_any_nt_unit_rule(g):
|
||||
"""Returns a non-terminal unit rule from 'g', or None if there is none."""
|
||||
for rule in g.rules:
|
||||
if len(rule.rhs) == 1 and isinstance(rule.rhs[0], NT):
|
||||
return rule
|
||||
return None
|
||||
|
||||
|
||||
def _remove_unit_rule(g, rule):
|
||||
"""Removes 'rule' from 'g' without changing the language produced by 'g'."""
|
||||
new_rules = [x for x in g.rules if x != rule]
|
||||
refs = [x for x in g.rules if x.lhs == rule.rhs[0]]
|
||||
new_rules += [build_unit_skiprule(rule, ref) for ref in refs]
|
||||
return Grammar(new_rules)
|
||||
|
||||
|
||||
def _split(rule):
|
||||
"""Splits a rule whose len(rhs) > 2 into shorter rules."""
|
||||
rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs)
|
||||
rule_name = '__SP_%s' % (rule_str) + '_%d'
|
||||
yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias)
|
||||
for i in range(1, len(rule.rhs) - 2):
|
||||
yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split')
|
||||
yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split')
|
||||
|
||||
|
||||
def _term(g):
|
||||
"""Applies the TERM rule on 'g' (see top comment)."""
|
||||
all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)}
|
||||
t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t}
|
||||
new_rules = []
|
||||
for rule in g.rules:
|
||||
if len(rule.rhs) > 1 and any(isinstance(x, T) for x in rule.rhs):
|
||||
new_rhs = [t_rules[x].lhs if isinstance(x, T) else x for x in rule.rhs]
|
||||
new_rules.append(Rule(rule.lhs, new_rhs, weight=rule.weight, alias=rule.alias))
|
||||
new_rules.extend(v for k, v in t_rules.items() if k in rule.rhs)
|
||||
else:
|
||||
new_rules.append(rule)
|
||||
return Grammar(new_rules)
|
||||
|
||||
|
||||
def _bin(g):
|
||||
"""Applies the BIN rule to 'g' (see top comment)."""
|
||||
new_rules = []
|
||||
for rule in g.rules:
|
||||
if len(rule.rhs) > 2:
|
||||
new_rules += _split(rule)
|
||||
else:
|
||||
new_rules.append(rule)
|
||||
return Grammar(new_rules)
|
||||
|
||||
|
||||
def _unit(g):
|
||||
"""Applies the UNIT rule to 'g' (see top comment)."""
|
||||
nt_unit_rule = get_any_nt_unit_rule(g)
|
||||
while nt_unit_rule:
|
||||
g = _remove_unit_rule(g, nt_unit_rule)
|
||||
nt_unit_rule = get_any_nt_unit_rule(g)
|
||||
return g
|
||||
|
||||
|
||||
def to_cnf(g):
|
||||
"""Creates a CNF grammar from a general context-free grammar 'g'."""
|
||||
g = _unit(_bin(_term(g)))
|
||||
return CnfWrapper(g)
|
||||
|
||||
|
||||
def unroll_unit_skiprule(lhs, orig_rhs, skipped_rules, children, weight, alias):
|
||||
if not skipped_rules:
|
||||
return RuleNode(Rule(lhs, orig_rhs, weight=weight, alias=alias), children, weight=weight)
|
||||
else:
|
||||
weight = weight - skipped_rules[0].weight
|
||||
return RuleNode(
|
||||
Rule(lhs, [skipped_rules[0].lhs], weight=weight, alias=alias), [
|
||||
unroll_unit_skiprule(skipped_rules[0].lhs, orig_rhs,
|
||||
skipped_rules[1:], children,
|
||||
skipped_rules[0].weight, skipped_rules[0].alias)
|
||||
], weight=weight)
|
||||
|
||||
|
||||
def revert_cnf(node):
|
||||
"""Reverts a parse tree (RuleNode) to its original non-CNF form (Node)."""
|
||||
if isinstance(node, T):
|
||||
return node
|
||||
# Reverts TERM rule.
|
||||
if node.rule.lhs.name.startswith('__T_'):
|
||||
return node.children[0]
|
||||
else:
|
||||
children = []
|
||||
for child in map(revert_cnf, node.children):
|
||||
# Reverts BIN rule.
|
||||
if isinstance(child, RuleNode) and child.rule.lhs.name.startswith('__SP_'):
|
||||
children += child.children
|
||||
else:
|
||||
children.append(child)
|
||||
# Reverts UNIT rule.
|
||||
if isinstance(node.rule, UnitSkipRule):
|
||||
return unroll_unit_skiprule(node.rule.lhs, node.rule.rhs,
|
||||
node.rule.skipped_rules, children,
|
||||
node.rule.weight, node.rule.alias)
|
||||
else:
|
||||
return RuleNode(node.rule, children)
|
||||
314
ccxt/static_dependencies/lark/parsers/earley.py
Normal file
314
ccxt/static_dependencies/lark/parsers/earley.py
Normal file
@@ -0,0 +1,314 @@
|
||||
"""This module implements an Earley parser.
|
||||
|
||||
The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
|
||||
https://www.sciencedirect.com/science/article/pii/S1571066108001497
|
||||
|
||||
That is probably the best reference for understanding the algorithm here.
|
||||
|
||||
The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
|
||||
is explained here: https://lark-parser.readthedocs.io/en/latest/_static/sppf/sppf.html
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Callable, Optional, List, Any
|
||||
from collections import deque
|
||||
|
||||
from ..lexer import Token
|
||||
from ..tree import Tree
|
||||
from ..exceptions import UnexpectedEOF, UnexpectedToken
|
||||
from ..utils import logger, OrderedSet, dedup_list
|
||||
from .grammar_analysis import GrammarAnalyzer
|
||||
from ..grammar import NonTerminal
|
||||
from .earley_common import Item
|
||||
from .earley_forest import ForestSumVisitor, SymbolNode, StableSymbolNode, TokenNode, ForestToParseTree
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..common import LexerConf, ParserConf
|
||||
|
||||
class Parser:
|
||||
lexer_conf: 'LexerConf'
|
||||
parser_conf: 'ParserConf'
|
||||
debug: bool
|
||||
|
||||
def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matcher: Callable,
|
||||
resolve_ambiguity: bool=True, debug: bool=False,
|
||||
tree_class: Optional[Callable[[str, List], Any]]=Tree, ordered_sets: bool=True):
|
||||
analysis = GrammarAnalyzer(parser_conf)
|
||||
self.lexer_conf = lexer_conf
|
||||
self.parser_conf = parser_conf
|
||||
self.resolve_ambiguity = resolve_ambiguity
|
||||
self.debug = debug
|
||||
self.Tree = tree_class
|
||||
self.Set = OrderedSet if ordered_sets else set
|
||||
self.SymbolNode = StableSymbolNode if ordered_sets else SymbolNode
|
||||
|
||||
self.FIRST = analysis.FIRST
|
||||
self.NULLABLE = analysis.NULLABLE
|
||||
self.callbacks = parser_conf.callbacks
|
||||
# TODO add typing info
|
||||
self.predictions = {} # type: ignore[var-annotated]
|
||||
|
||||
## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than
|
||||
# the slow 'isupper' in is_terminal.
|
||||
self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term }
|
||||
self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term }
|
||||
|
||||
self.forest_sum_visitor = None
|
||||
for rule in parser_conf.rules:
|
||||
if rule.origin not in self.predictions:
|
||||
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]
|
||||
|
||||
## Detect if any rules/terminals have priorities set. If the user specified priority = None, then
|
||||
# the priorities will be stripped from all rules/terminals before they reach us, allowing us to
|
||||
# skip the extra tree walk. We'll also skip this if the user just didn't specify priorities
|
||||
# on any rules/terminals.
|
||||
if self.forest_sum_visitor is None and rule.options.priority is not None:
|
||||
self.forest_sum_visitor = ForestSumVisitor
|
||||
|
||||
# Check terminals for priorities
|
||||
# Ignore terminal priorities if the basic lexer is used
|
||||
if self.lexer_conf.lexer_type != 'basic' and self.forest_sum_visitor is None:
|
||||
for term in self.lexer_conf.terminals:
|
||||
if term.priority:
|
||||
self.forest_sum_visitor = ForestSumVisitor
|
||||
break
|
||||
|
||||
self.term_matcher = term_matcher
|
||||
|
||||
|
||||
def predict_and_complete(self, i, to_scan, columns, transitives):
|
||||
"""The core Earley Predictor and Completer.
|
||||
|
||||
At each stage of the input, we handling any completed items (things
|
||||
that matched on the last cycle) and use those to predict what should
|
||||
come next in the input stream. The completions and any predicted
|
||||
non-terminals are recursively processed until we reach a set of,
|
||||
which can be added to the scan list for the next scanner cycle."""
|
||||
# Held Completions (H in E.Scotts paper).
|
||||
node_cache = {}
|
||||
held_completions = {}
|
||||
|
||||
column = columns[i]
|
||||
# R (items) = Ei (column.items)
|
||||
items = deque(column)
|
||||
while items:
|
||||
item = items.pop() # remove an element, A say, from R
|
||||
|
||||
### The Earley completer
|
||||
if item.is_complete: ### (item.s == string)
|
||||
if item.node is None:
|
||||
label = (item.s, item.start, i)
|
||||
item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
|
||||
item.node.add_family(item.s, item.rule, item.start, None, None)
|
||||
|
||||
# create_leo_transitives(item.rule.origin, item.start)
|
||||
|
||||
###R Joop Leo right recursion Completer
|
||||
if item.rule.origin in transitives[item.start]:
|
||||
transitive = transitives[item.start][item.s]
|
||||
if transitive.previous in transitives[transitive.column]:
|
||||
root_transitive = transitives[transitive.column][transitive.previous]
|
||||
else:
|
||||
root_transitive = transitive
|
||||
|
||||
new_item = Item(transitive.rule, transitive.ptr, transitive.start)
|
||||
label = (root_transitive.s, root_transitive.start, i)
|
||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
|
||||
new_item.node.add_path(root_transitive, item.node)
|
||||
if new_item.expect in self.TERMINALS:
|
||||
# Add (B :: aC.B, h, y) to Q
|
||||
to_scan.add(new_item)
|
||||
elif new_item not in column:
|
||||
# Add (B :: aC.B, h, y) to Ei and R
|
||||
column.add(new_item)
|
||||
items.append(new_item)
|
||||
###R Regular Earley completer
|
||||
else:
|
||||
# Empty has 0 length. If we complete an empty symbol in a particular
|
||||
# parse step, we need to be able to use that same empty symbol to complete
|
||||
# any predictions that result, that themselves require empty. Avoids
|
||||
# infinite recursion on empty symbols.
|
||||
# held_completions is 'H' in E.Scott's paper.
|
||||
is_empty_item = item.start == i
|
||||
if is_empty_item:
|
||||
held_completions[item.rule.origin] = item.node
|
||||
|
||||
originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s]
|
||||
for originator in originators:
|
||||
new_item = originator.advance()
|
||||
label = (new_item.s, originator.start, i)
|
||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
|
||||
new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node)
|
||||
if new_item.expect in self.TERMINALS:
|
||||
# Add (B :: aC.B, h, y) to Q
|
||||
to_scan.add(new_item)
|
||||
elif new_item not in column:
|
||||
# Add (B :: aC.B, h, y) to Ei and R
|
||||
column.add(new_item)
|
||||
items.append(new_item)
|
||||
|
||||
### The Earley predictor
|
||||
elif item.expect in self.NON_TERMINALS: ### (item.s == lr0)
|
||||
new_items = []
|
||||
for rule in self.predictions[item.expect]:
|
||||
new_item = Item(rule, 0, i)
|
||||
new_items.append(new_item)
|
||||
|
||||
# Process any held completions (H).
|
||||
if item.expect in held_completions:
|
||||
new_item = item.advance()
|
||||
label = (new_item.s, item.start, i)
|
||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
|
||||
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect])
|
||||
new_items.append(new_item)
|
||||
|
||||
for new_item in new_items:
|
||||
if new_item.expect in self.TERMINALS:
|
||||
to_scan.add(new_item)
|
||||
elif new_item not in column:
|
||||
column.add(new_item)
|
||||
items.append(new_item)
|
||||
|
||||
def _parse(self, lexer, columns, to_scan, start_symbol=None):
|
||||
|
||||
def is_quasi_complete(item):
|
||||
if item.is_complete:
|
||||
return True
|
||||
|
||||
quasi = item.advance()
|
||||
while not quasi.is_complete:
|
||||
if quasi.expect not in self.NULLABLE:
|
||||
return False
|
||||
if quasi.rule.origin == start_symbol and quasi.expect == start_symbol:
|
||||
return False
|
||||
quasi = quasi.advance()
|
||||
return True
|
||||
|
||||
# def create_leo_transitives(origin, start):
|
||||
# ... # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420
|
||||
|
||||
def scan(i, token, to_scan):
|
||||
"""The core Earley Scanner.
|
||||
|
||||
This is a custom implementation of the scanner that uses the
|
||||
Lark lexer to match tokens. The scan list is built by the
|
||||
Earley predictor, based on the previously completed tokens.
|
||||
This ensures that at each phase of the parse we have a custom
|
||||
lexer context, allowing for more complex ambiguities."""
|
||||
next_to_scan = self.Set()
|
||||
next_set = self.Set()
|
||||
columns.append(next_set)
|
||||
transitives.append({})
|
||||
node_cache = {}
|
||||
|
||||
for item in self.Set(to_scan):
|
||||
if match(item.expect, token):
|
||||
new_item = item.advance()
|
||||
label = (new_item.s, new_item.start, i)
|
||||
# 'terminals' may not contain token.type when using %declare
|
||||
# Additionally, token is not always a Token
|
||||
# For example, it can be a Tree when using TreeMatcher
|
||||
term = terminals.get(token.type) if isinstance(token, Token) else None
|
||||
# Set the priority of the token node to 0 so that the
|
||||
# terminal priorities do not affect the Tree chosen by
|
||||
# ForestSumVisitor after the basic lexer has already
|
||||
# "used up" the terminal priorities
|
||||
token_node = TokenNode(token, term, priority=0)
|
||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
|
||||
new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token_node)
|
||||
|
||||
if new_item.expect in self.TERMINALS:
|
||||
# add (B ::= Aai+1.B, h, y) to Q'
|
||||
next_to_scan.add(new_item)
|
||||
else:
|
||||
# add (B ::= Aa+1.B, h, y) to Ei+1
|
||||
next_set.add(new_item)
|
||||
|
||||
if not next_set and not next_to_scan:
|
||||
expect = {i.expect.name for i in to_scan}
|
||||
raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.s for i in to_scan))
|
||||
|
||||
return next_to_scan
|
||||
|
||||
|
||||
# Define parser functions
|
||||
match = self.term_matcher
|
||||
|
||||
terminals = self.lexer_conf.terminals_by_name
|
||||
|
||||
# Cache for nodes & tokens created in a particular parse step.
|
||||
transitives = [{}]
|
||||
|
||||
## The main Earley loop.
|
||||
# Run the Prediction/Completion cycle for any Items in the current Earley set.
|
||||
# Completions will be added to the SPPF tree, and predictions will be recursively
|
||||
# processed down to terminals/empty nodes to be added to the scanner for the next
|
||||
# step.
|
||||
expects = {i.expect for i in to_scan}
|
||||
i = 0
|
||||
for token in lexer.lex(expects):
|
||||
self.predict_and_complete(i, to_scan, columns, transitives)
|
||||
|
||||
to_scan = scan(i, token, to_scan)
|
||||
i += 1
|
||||
|
||||
expects.clear()
|
||||
expects |= {i.expect for i in to_scan}
|
||||
|
||||
self.predict_and_complete(i, to_scan, columns, transitives)
|
||||
|
||||
## Column is now the final column in the parse.
|
||||
assert i == len(columns)-1
|
||||
return to_scan
|
||||
|
||||
def parse(self, lexer, start):
|
||||
assert start, start
|
||||
start_symbol = NonTerminal(start)
|
||||
|
||||
columns = [self.Set()]
|
||||
to_scan = self.Set() # The scan buffer. 'Q' in E.Scott's paper.
|
||||
|
||||
## Predict for the start_symbol.
|
||||
# Add predicted items to the first Earley set (for the predictor) if they
|
||||
# result in a non-terminal, or the scanner if they result in a terminal.
|
||||
for rule in self.predictions[start_symbol]:
|
||||
item = Item(rule, 0, 0)
|
||||
if item.expect in self.TERMINALS:
|
||||
to_scan.add(item)
|
||||
else:
|
||||
columns[0].add(item)
|
||||
|
||||
to_scan = self._parse(lexer, columns, to_scan, start_symbol)
|
||||
|
||||
# If the parse was successful, the start
|
||||
# symbol should have been completed in the last step of the Earley cycle, and will be in
|
||||
# this column. Find the item for the start_symbol, which is the root of the SPPF tree.
|
||||
solutions = dedup_list(n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0)
|
||||
if not solutions:
|
||||
expected_terminals = [t.expect.name for t in to_scan]
|
||||
raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan))
|
||||
|
||||
if self.debug:
|
||||
from .earley_forest import ForestToPyDotVisitor
|
||||
try:
|
||||
debug_walker = ForestToPyDotVisitor()
|
||||
except ImportError:
|
||||
logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
|
||||
else:
|
||||
for i, s in enumerate(solutions):
|
||||
debug_walker.visit(s, f"sppf{i}.png")
|
||||
|
||||
|
||||
if self.Tree is not None:
|
||||
# Perform our SPPF -> AST conversion
|
||||
transformer = ForestToParseTree(self.Tree, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity)
|
||||
solutions = [transformer.transform(s) for s in solutions]
|
||||
|
||||
if len(solutions) > 1:
|
||||
t: Tree = self.Tree('_ambig', solutions)
|
||||
t.expand_kids_by_data('_ambig') # solutions may themselves be _ambig nodes
|
||||
return t
|
||||
return solutions[0]
|
||||
|
||||
# return the root of the SPPF
|
||||
# TODO return a list of solutions, or join them together somehow
|
||||
return solutions[0]
|
||||
42
ccxt/static_dependencies/lark/parsers/earley_common.py
Normal file
42
ccxt/static_dependencies/lark/parsers/earley_common.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""This module implements useful building blocks for the Earley parser
|
||||
"""
|
||||
|
||||
|
||||
class Item:
|
||||
"An Earley Item, the atom of the algorithm."
|
||||
|
||||
__slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash')
|
||||
def __init__(self, rule, ptr, start):
|
||||
self.is_complete = len(rule.expansion) == ptr
|
||||
self.rule = rule # rule
|
||||
self.ptr = ptr # ptr
|
||||
self.start = start # j
|
||||
self.node = None # w
|
||||
if self.is_complete:
|
||||
self.s = rule.origin
|
||||
self.expect = None
|
||||
self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
|
||||
else:
|
||||
self.s = (rule, ptr)
|
||||
self.expect = rule.expansion[ptr]
|
||||
self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
|
||||
self._hash = hash((self.s, self.start, self.rule))
|
||||
|
||||
def advance(self):
|
||||
return Item(self.rule, self.ptr + 1, self.start)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self is other or (self.s == other.s and self.start == other.start and self.rule == other.rule)
|
||||
|
||||
def __hash__(self):
|
||||
return self._hash
|
||||
|
||||
def __repr__(self):
|
||||
before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
|
||||
after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
|
||||
symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after))
|
||||
return '%s (%d)' % (symbol, self.start)
|
||||
|
||||
|
||||
# class TransitiveItem(Item):
|
||||
# ... # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420
|
||||
801
ccxt/static_dependencies/lark/parsers/earley_forest.py
Normal file
801
ccxt/static_dependencies/lark/parsers/earley_forest.py
Normal file
@@ -0,0 +1,801 @@
|
||||
""""This module implements an SPPF implementation
|
||||
|
||||
This is used as the primary output mechanism for the Earley parser
|
||||
in order to store complex ambiguities.
|
||||
|
||||
Full reference and more details is here:
|
||||
https://web.archive.org/web/20190616123959/http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
|
||||
"""
|
||||
|
||||
from typing import Type, AbstractSet
|
||||
from random import randint
|
||||
from collections import deque
|
||||
from operator import attrgetter
|
||||
from importlib import import_module
|
||||
from functools import partial
|
||||
|
||||
from ..parse_tree_builder import AmbiguousIntermediateExpander
|
||||
from ..visitors import Discard
|
||||
from ..utils import logger, OrderedSet
|
||||
from ..tree import Tree
|
||||
|
||||
class ForestNode:
|
||||
pass
|
||||
|
||||
class SymbolNode(ForestNode):
|
||||
"""
|
||||
A Symbol Node represents a symbol (or Intermediate LR0).
|
||||
|
||||
Symbol nodes are keyed by the symbol (s). For intermediate nodes
|
||||
s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol
|
||||
nodes, s will be a string representing the non-terminal origin (i.e.
|
||||
the left hand side of the rule).
|
||||
|
||||
The children of a Symbol or Intermediate Node will always be Packed Nodes;
|
||||
with each Packed Node child representing a single derivation of a production.
|
||||
|
||||
Hence a Symbol Node with a single child is unambiguous.
|
||||
|
||||
Parameters:
|
||||
s: A Symbol, or a tuple of (rule, ptr) for an intermediate node.
|
||||
start: For dynamic lexers, the index of the start of the substring matched by this symbol (inclusive).
|
||||
end: For dynamic lexers, the index of the end of the substring matched by this symbol (exclusive).
|
||||
|
||||
Properties:
|
||||
is_intermediate: True if this node is an intermediate node.
|
||||
priority: The priority of the node's symbol.
|
||||
"""
|
||||
Set: Type[AbstractSet] = set # Overridden by StableSymbolNode
|
||||
__slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate')
|
||||
def __init__(self, s, start, end):
|
||||
self.s = s
|
||||
self.start = start
|
||||
self.end = end
|
||||
self._children = self.Set()
|
||||
self.paths = self.Set()
|
||||
self.paths_loaded = False
|
||||
|
||||
### We use inf here as it can be safely negated without resorting to conditionals,
|
||||
# unlike None or float('NaN'), and sorts appropriately.
|
||||
self.priority = float('-inf')
|
||||
self.is_intermediate = isinstance(s, tuple)
|
||||
|
||||
def add_family(self, lr0, rule, start, left, right):
|
||||
self._children.add(PackedNode(self, lr0, rule, start, left, right))
|
||||
|
||||
def add_path(self, transitive, node):
|
||||
self.paths.add((transitive, node))
|
||||
|
||||
def load_paths(self):
|
||||
for transitive, node in self.paths:
|
||||
if transitive.next_titem is not None:
|
||||
vn = type(self)(transitive.next_titem.s, transitive.next_titem.start, self.end)
|
||||
vn.add_path(transitive.next_titem, node)
|
||||
self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn)
|
||||
else:
|
||||
self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node)
|
||||
self.paths_loaded = True
|
||||
|
||||
@property
|
||||
def is_ambiguous(self):
|
||||
"""Returns True if this node is ambiguous."""
|
||||
return len(self.children) > 1
|
||||
|
||||
@property
|
||||
def children(self):
|
||||
"""Returns a list of this node's children sorted from greatest to
|
||||
least priority."""
|
||||
if not self.paths_loaded:
|
||||
self.load_paths()
|
||||
return sorted(self._children, key=attrgetter('sort_key'))
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._children)
|
||||
|
||||
def __repr__(self):
|
||||
if self.is_intermediate:
|
||||
rule = self.s[0]
|
||||
ptr = self.s[1]
|
||||
before = ( expansion.name for expansion in rule.expansion[:ptr] )
|
||||
after = ( expansion.name for expansion in rule.expansion[ptr:] )
|
||||
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
|
||||
else:
|
||||
symbol = self.s.name
|
||||
return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority)
|
||||
|
||||
class StableSymbolNode(SymbolNode):
|
||||
"A version of SymbolNode that uses OrderedSet for output stability"
|
||||
Set = OrderedSet
|
||||
|
||||
class PackedNode(ForestNode):
|
||||
"""
|
||||
A Packed Node represents a single derivation in a symbol node.
|
||||
|
||||
Parameters:
|
||||
rule: The rule associated with this node.
|
||||
parent: The parent of this node.
|
||||
left: The left child of this node. ``None`` if one does not exist.
|
||||
right: The right child of this node. ``None`` if one does not exist.
|
||||
priority: The priority of this node.
|
||||
"""
|
||||
__slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash')
|
||||
def __init__(self, parent, s, rule, start, left, right):
|
||||
self.parent = parent
|
||||
self.s = s
|
||||
self.start = start
|
||||
self.rule = rule
|
||||
self.left = left
|
||||
self.right = right
|
||||
self.priority = float('-inf')
|
||||
self._hash = hash((self.left, self.right))
|
||||
|
||||
@property
|
||||
def is_empty(self):
|
||||
return self.left is None and self.right is None
|
||||
|
||||
@property
|
||||
def sort_key(self):
|
||||
"""
|
||||
Used to sort PackedNode children of SymbolNodes.
|
||||
A SymbolNode has multiple PackedNodes if it matched
|
||||
ambiguously. Hence, we use the sort order to identify
|
||||
the order in which ambiguous children should be considered.
|
||||
"""
|
||||
return self.is_empty, -self.priority, self.rule.order
|
||||
|
||||
@property
|
||||
def children(self):
|
||||
"""Returns a list of this node's children."""
|
||||
return [x for x in [self.left, self.right] if x is not None]
|
||||
|
||||
def __iter__(self):
|
||||
yield self.left
|
||||
yield self.right
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, PackedNode):
|
||||
return False
|
||||
return self is other or (self.left == other.left and self.right == other.right)
|
||||
|
||||
def __hash__(self):
|
||||
return self._hash
|
||||
|
||||
def __repr__(self):
|
||||
if isinstance(self.s, tuple):
|
||||
rule = self.s[0]
|
||||
ptr = self.s[1]
|
||||
before = ( expansion.name for expansion in rule.expansion[:ptr] )
|
||||
after = ( expansion.name for expansion in rule.expansion[ptr:] )
|
||||
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
|
||||
else:
|
||||
symbol = self.s.name
|
||||
return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order)
|
||||
|
||||
class TokenNode(ForestNode):
|
||||
"""
|
||||
A Token Node represents a matched terminal and is always a leaf node.
|
||||
|
||||
Parameters:
|
||||
token: The Token associated with this node.
|
||||
term: The TerminalDef matched by the token.
|
||||
priority: The priority of this node.
|
||||
"""
|
||||
__slots__ = ('token', 'term', 'priority', '_hash')
|
||||
def __init__(self, token, term, priority=None):
|
||||
self.token = token
|
||||
self.term = term
|
||||
if priority is not None:
|
||||
self.priority = priority
|
||||
else:
|
||||
self.priority = term.priority if term is not None else 0
|
||||
self._hash = hash(token)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, TokenNode):
|
||||
return False
|
||||
return self is other or (self.token == other.token)
|
||||
|
||||
def __hash__(self):
|
||||
return self._hash
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self.token)
|
||||
|
||||
class ForestVisitor:
|
||||
"""
|
||||
An abstract base class for building forest visitors.
|
||||
|
||||
This class performs a controllable depth-first walk of an SPPF.
|
||||
The visitor will not enter cycles and will backtrack if one is encountered.
|
||||
Subclasses are notified of cycles through the ``on_cycle`` method.
|
||||
|
||||
Behavior for visit events is defined by overriding the
|
||||
``visit*node*`` functions.
|
||||
|
||||
The walk is controlled by the return values of the ``visit*node_in``
|
||||
methods. Returning a node(s) will schedule them to be visited. The visitor
|
||||
will begin to backtrack if no nodes are returned.
|
||||
|
||||
Parameters:
|
||||
single_visit: If ``True``, non-Token nodes will only be visited once.
|
||||
"""
|
||||
|
||||
def __init__(self, single_visit=False):
|
||||
self.single_visit = single_visit
|
||||
|
||||
def visit_token_node(self, node):
|
||||
"""Called when a ``Token`` is visited. ``Token`` nodes are always leaves."""
|
||||
pass
|
||||
|
||||
def visit_symbol_node_in(self, node):
|
||||
"""Called when a symbol node is visited. Nodes that are returned
|
||||
will be scheduled to be visited. If ``visit_intermediate_node_in``
|
||||
is not implemented, this function will be called for intermediate
|
||||
nodes as well."""
|
||||
pass
|
||||
|
||||
def visit_symbol_node_out(self, node):
|
||||
"""Called after all nodes returned from a corresponding ``visit_symbol_node_in``
|
||||
call have been visited. If ``visit_intermediate_node_out``
|
||||
is not implemented, this function will be called for intermediate
|
||||
nodes as well."""
|
||||
pass
|
||||
|
||||
def visit_packed_node_in(self, node):
|
||||
"""Called when a packed node is visited. Nodes that are returned
|
||||
will be scheduled to be visited. """
|
||||
pass
|
||||
|
||||
def visit_packed_node_out(self, node):
|
||||
"""Called after all nodes returned from a corresponding ``visit_packed_node_in``
|
||||
call have been visited."""
|
||||
pass
|
||||
|
||||
def on_cycle(self, node, path):
|
||||
"""Called when a cycle is encountered.
|
||||
|
||||
Parameters:
|
||||
node: The node that causes a cycle.
|
||||
path: The list of nodes being visited: nodes that have been
|
||||
entered but not exited. The first element is the root in a forest
|
||||
visit, and the last element is the node visited most recently.
|
||||
``path`` should be treated as read-only.
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_cycle_in_path(self, node, path):
|
||||
"""A utility function for use in ``on_cycle`` to obtain a slice of
|
||||
``path`` that only contains the nodes that make up the cycle."""
|
||||
index = len(path) - 1
|
||||
while id(path[index]) != id(node):
|
||||
index -= 1
|
||||
return path[index:]
|
||||
|
||||
def visit(self, root):
|
||||
# Visiting is a list of IDs of all symbol/intermediate nodes currently in
|
||||
# the stack. It serves two purposes: to detect when we 'recurse' in and out
|
||||
# of a symbol/intermediate so that we can process both up and down. Also,
|
||||
# since the SPPF can have cycles it allows us to detect if we're trying
|
||||
# to recurse into a node that's already on the stack (infinite recursion).
|
||||
visiting = set()
|
||||
|
||||
# set of all nodes that have been visited
|
||||
visited = set()
|
||||
|
||||
# a list of nodes that are currently being visited
|
||||
# used for the `on_cycle` callback
|
||||
path = []
|
||||
|
||||
# We do not use recursion here to walk the Forest due to the limited
|
||||
# stack size in python. Therefore input_stack is essentially our stack.
|
||||
input_stack = deque([root])
|
||||
|
||||
# It is much faster to cache these as locals since they are called
|
||||
# many times in large parses.
|
||||
vpno = getattr(self, 'visit_packed_node_out')
|
||||
vpni = getattr(self, 'visit_packed_node_in')
|
||||
vsno = getattr(self, 'visit_symbol_node_out')
|
||||
vsni = getattr(self, 'visit_symbol_node_in')
|
||||
vino = getattr(self, 'visit_intermediate_node_out', vsno)
|
||||
vini = getattr(self, 'visit_intermediate_node_in', vsni)
|
||||
vtn = getattr(self, 'visit_token_node')
|
||||
oc = getattr(self, 'on_cycle')
|
||||
|
||||
while input_stack:
|
||||
current = next(reversed(input_stack))
|
||||
try:
|
||||
next_node = next(current)
|
||||
except StopIteration:
|
||||
input_stack.pop()
|
||||
continue
|
||||
except TypeError:
|
||||
### If the current object is not an iterator, pass through to Token/SymbolNode
|
||||
pass
|
||||
else:
|
||||
if next_node is None:
|
||||
continue
|
||||
|
||||
if id(next_node) in visiting:
|
||||
oc(next_node, path)
|
||||
continue
|
||||
|
||||
input_stack.append(next_node)
|
||||
continue
|
||||
|
||||
if isinstance(current, TokenNode):
|
||||
vtn(current.token)
|
||||
input_stack.pop()
|
||||
continue
|
||||
|
||||
current_id = id(current)
|
||||
if current_id in visiting:
|
||||
if isinstance(current, PackedNode):
|
||||
vpno(current)
|
||||
elif current.is_intermediate:
|
||||
vino(current)
|
||||
else:
|
||||
vsno(current)
|
||||
input_stack.pop()
|
||||
path.pop()
|
||||
visiting.remove(current_id)
|
||||
visited.add(current_id)
|
||||
elif self.single_visit and current_id in visited:
|
||||
input_stack.pop()
|
||||
else:
|
||||
visiting.add(current_id)
|
||||
path.append(current)
|
||||
if isinstance(current, PackedNode):
|
||||
next_node = vpni(current)
|
||||
elif current.is_intermediate:
|
||||
next_node = vini(current)
|
||||
else:
|
||||
next_node = vsni(current)
|
||||
if next_node is None:
|
||||
continue
|
||||
|
||||
if not isinstance(next_node, ForestNode):
|
||||
next_node = iter(next_node)
|
||||
elif id(next_node) in visiting:
|
||||
oc(next_node, path)
|
||||
continue
|
||||
|
||||
input_stack.append(next_node)
|
||||
|
||||
class ForestTransformer(ForestVisitor):
|
||||
"""The base class for a bottom-up forest transformation. Most users will
|
||||
want to use ``TreeForestTransformer`` instead as it has a friendlier
|
||||
interface and covers most use cases.
|
||||
|
||||
Transformations are applied via inheritance and overriding of the
|
||||
``transform*node`` methods.
|
||||
|
||||
``transform_token_node`` receives a ``Token`` as an argument.
|
||||
All other methods receive the node that is being transformed and
|
||||
a list of the results of the transformations of that node's children.
|
||||
The return value of these methods are the resulting transformations.
|
||||
|
||||
If ``Discard`` is raised in a node's transformation, no data from that node
|
||||
will be passed to its parent's transformation.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(ForestTransformer, self).__init__()
|
||||
# results of transformations
|
||||
self.data = dict()
|
||||
# used to track parent nodes
|
||||
self.node_stack = deque()
|
||||
|
||||
def transform(self, root):
|
||||
"""Perform a transformation on an SPPF."""
|
||||
self.node_stack.append('result')
|
||||
self.data['result'] = []
|
||||
self.visit(root)
|
||||
assert len(self.data['result']) <= 1
|
||||
if self.data['result']:
|
||||
return self.data['result'][0]
|
||||
|
||||
def transform_symbol_node(self, node, data):
|
||||
"""Transform a symbol node."""
|
||||
return node
|
||||
|
||||
def transform_intermediate_node(self, node, data):
|
||||
"""Transform an intermediate node."""
|
||||
return node
|
||||
|
||||
def transform_packed_node(self, node, data):
|
||||
"""Transform a packed node."""
|
||||
return node
|
||||
|
||||
def transform_token_node(self, node):
|
||||
"""Transform a ``Token``."""
|
||||
return node
|
||||
|
||||
def visit_symbol_node_in(self, node):
|
||||
self.node_stack.append(id(node))
|
||||
self.data[id(node)] = []
|
||||
return node.children
|
||||
|
||||
def visit_packed_node_in(self, node):
|
||||
self.node_stack.append(id(node))
|
||||
self.data[id(node)] = []
|
||||
return node.children
|
||||
|
||||
def visit_token_node(self, node):
|
||||
transformed = self.transform_token_node(node)
|
||||
if transformed is not Discard:
|
||||
self.data[self.node_stack[-1]].append(transformed)
|
||||
|
||||
def _visit_node_out_helper(self, node, method):
|
||||
self.node_stack.pop()
|
||||
transformed = method(node, self.data[id(node)])
|
||||
if transformed is not Discard:
|
||||
self.data[self.node_stack[-1]].append(transformed)
|
||||
del self.data[id(node)]
|
||||
|
||||
def visit_symbol_node_out(self, node):
|
||||
self._visit_node_out_helper(node, self.transform_symbol_node)
|
||||
|
||||
def visit_intermediate_node_out(self, node):
|
||||
self._visit_node_out_helper(node, self.transform_intermediate_node)
|
||||
|
||||
def visit_packed_node_out(self, node):
|
||||
self._visit_node_out_helper(node, self.transform_packed_node)
|
||||
|
||||
|
||||
class ForestSumVisitor(ForestVisitor):
|
||||
"""
|
||||
A visitor for prioritizing ambiguous parts of the Forest.
|
||||
|
||||
This visitor is used when support for explicit priorities on
|
||||
rules is requested (whether normal, or invert). It walks the
|
||||
forest (or subsets thereof) and cascades properties upwards
|
||||
from the leaves.
|
||||
|
||||
It would be ideal to do this during parsing, however this would
|
||||
require processing each Earley item multiple times. That's
|
||||
a big performance drawback; so running a forest walk is the
|
||||
lesser of two evils: there can be significantly more Earley
|
||||
items created during parsing than there are SPPF nodes in the
|
||||
final tree.
|
||||
"""
|
||||
def __init__(self):
|
||||
super(ForestSumVisitor, self).__init__(single_visit=True)
|
||||
|
||||
def visit_packed_node_in(self, node):
|
||||
yield node.left
|
||||
yield node.right
|
||||
|
||||
def visit_symbol_node_in(self, node):
|
||||
return iter(node.children)
|
||||
|
||||
def visit_packed_node_out(self, node):
|
||||
priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options.priority else 0
|
||||
priority += getattr(node.right, 'priority', 0)
|
||||
priority += getattr(node.left, 'priority', 0)
|
||||
node.priority = priority
|
||||
|
||||
def visit_symbol_node_out(self, node):
|
||||
node.priority = max(child.priority for child in node.children)
|
||||
|
||||
class PackedData():
|
||||
"""Used in transformationss of packed nodes to distinguish the data
|
||||
that comes from the left child and the right child.
|
||||
"""
|
||||
|
||||
class _NoData():
|
||||
pass
|
||||
|
||||
NO_DATA = _NoData()
|
||||
|
||||
def __init__(self, node, data):
|
||||
self.left = self.NO_DATA
|
||||
self.right = self.NO_DATA
|
||||
if data:
|
||||
if node.left is not None:
|
||||
self.left = data[0]
|
||||
if len(data) > 1:
|
||||
self.right = data[1]
|
||||
else:
|
||||
self.right = data[0]
|
||||
|
||||
class ForestToParseTree(ForestTransformer):
|
||||
"""Used by the earley parser when ambiguity equals 'resolve' or
|
||||
'explicit'. Transforms an SPPF into an (ambiguous) parse tree.
|
||||
|
||||
Parameters:
|
||||
tree_class: The tree class to use for construction
|
||||
callbacks: A dictionary of rules to functions that output a tree
|
||||
prioritizer: A ``ForestVisitor`` that manipulates the priorities of ForestNodes
|
||||
resolve_ambiguity: If True, ambiguities will be resolved based on
|
||||
priorities. Otherwise, `_ambig` nodes will be in the resulting tree.
|
||||
use_cache: If True, the results of packed node transformations will be cached.
|
||||
"""
|
||||
|
||||
def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=True):
|
||||
super(ForestToParseTree, self).__init__()
|
||||
self.tree_class = tree_class
|
||||
self.callbacks = callbacks
|
||||
self.prioritizer = prioritizer
|
||||
self.resolve_ambiguity = resolve_ambiguity
|
||||
self._use_cache = use_cache
|
||||
self._cache = {}
|
||||
self._on_cycle_retreat = False
|
||||
self._cycle_node = None
|
||||
self._successful_visits = set()
|
||||
|
||||
def visit(self, root):
|
||||
if self.prioritizer:
|
||||
self.prioritizer.visit(root)
|
||||
super(ForestToParseTree, self).visit(root)
|
||||
self._cache = {}
|
||||
|
||||
def on_cycle(self, node, path):
|
||||
logger.debug("Cycle encountered in the SPPF at node: %s. "
|
||||
"As infinite ambiguities cannot be represented in a tree, "
|
||||
"this family of derivations will be discarded.", node)
|
||||
self._cycle_node = node
|
||||
self._on_cycle_retreat = True
|
||||
|
||||
def _check_cycle(self, node):
|
||||
if self._on_cycle_retreat:
|
||||
if id(node) == id(self._cycle_node) or id(node) in self._successful_visits:
|
||||
self._cycle_node = None
|
||||
self._on_cycle_retreat = False
|
||||
else:
|
||||
return Discard
|
||||
|
||||
def _collapse_ambig(self, children):
|
||||
new_children = []
|
||||
for child in children:
|
||||
if hasattr(child, 'data') and child.data == '_ambig':
|
||||
new_children += child.children
|
||||
else:
|
||||
new_children.append(child)
|
||||
return new_children
|
||||
|
||||
def _call_rule_func(self, node, data):
|
||||
# called when transforming children of symbol nodes
|
||||
# data is a list of trees or tokens that correspond to the
|
||||
# symbol's rule expansion
|
||||
return self.callbacks[node.rule](data)
|
||||
|
||||
def _call_ambig_func(self, node, data):
|
||||
# called when transforming a symbol node
|
||||
# data is a list of trees where each tree's data is
|
||||
# equal to the name of the symbol or one of its aliases.
|
||||
if len(data) > 1:
|
||||
return self.tree_class('_ambig', data)
|
||||
elif data:
|
||||
return data[0]
|
||||
return Discard
|
||||
|
||||
def transform_symbol_node(self, node, data):
|
||||
if id(node) not in self._successful_visits:
|
||||
return Discard
|
||||
r = self._check_cycle(node)
|
||||
if r is Discard:
|
||||
return r
|
||||
self._successful_visits.remove(id(node))
|
||||
data = self._collapse_ambig(data)
|
||||
return self._call_ambig_func(node, data)
|
||||
|
||||
def transform_intermediate_node(self, node, data):
|
||||
if id(node) not in self._successful_visits:
|
||||
return Discard
|
||||
r = self._check_cycle(node)
|
||||
if r is Discard:
|
||||
return r
|
||||
self._successful_visits.remove(id(node))
|
||||
if len(data) > 1:
|
||||
children = [self.tree_class('_inter', c) for c in data]
|
||||
return self.tree_class('_iambig', children)
|
||||
return data[0]
|
||||
|
||||
def transform_packed_node(self, node, data):
|
||||
r = self._check_cycle(node)
|
||||
if r is Discard:
|
||||
return r
|
||||
if self.resolve_ambiguity and id(node.parent) in self._successful_visits:
|
||||
return Discard
|
||||
if self._use_cache and id(node) in self._cache:
|
||||
return self._cache[id(node)]
|
||||
children = []
|
||||
assert len(data) <= 2
|
||||
data = PackedData(node, data)
|
||||
if data.left is not PackedData.NO_DATA:
|
||||
if node.left.is_intermediate and isinstance(data.left, list):
|
||||
children += data.left
|
||||
else:
|
||||
children.append(data.left)
|
||||
if data.right is not PackedData.NO_DATA:
|
||||
children.append(data.right)
|
||||
if node.parent.is_intermediate:
|
||||
return self._cache.setdefault(id(node), children)
|
||||
return self._cache.setdefault(id(node), self._call_rule_func(node, children))
|
||||
|
||||
def visit_symbol_node_in(self, node):
|
||||
super(ForestToParseTree, self).visit_symbol_node_in(node)
|
||||
if self._on_cycle_retreat:
|
||||
return
|
||||
return node.children
|
||||
|
||||
def visit_packed_node_in(self, node):
|
||||
self._on_cycle_retreat = False
|
||||
to_visit = super(ForestToParseTree, self).visit_packed_node_in(node)
|
||||
if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits:
|
||||
if not self._use_cache or id(node) not in self._cache:
|
||||
return to_visit
|
||||
|
||||
def visit_packed_node_out(self, node):
|
||||
super(ForestToParseTree, self).visit_packed_node_out(node)
|
||||
if not self._on_cycle_retreat:
|
||||
self._successful_visits.add(id(node.parent))
|
||||
|
||||
def handles_ambiguity(func):
|
||||
"""Decorator for methods of subclasses of ``TreeForestTransformer``.
|
||||
Denotes that the method should receive a list of transformed derivations."""
|
||||
func.handles_ambiguity = True
|
||||
return func
|
||||
|
||||
class TreeForestTransformer(ForestToParseTree):
|
||||
"""A ``ForestTransformer`` with a tree ``Transformer``-like interface.
|
||||
By default, it will construct a tree.
|
||||
|
||||
Methods provided via inheritance are called based on the rule/symbol
|
||||
names of nodes in the forest.
|
||||
|
||||
Methods that act on rules will receive a list of the results of the
|
||||
transformations of the rule's children. By default, trees and tokens.
|
||||
|
||||
Methods that act on tokens will receive a token.
|
||||
|
||||
Alternatively, methods that act on rules may be annotated with
|
||||
``handles_ambiguity``. In this case, the function will receive a list
|
||||
of all the transformations of all the derivations of the rule.
|
||||
By default, a list of trees where each tree.data is equal to the
|
||||
rule name or one of its aliases.
|
||||
|
||||
Non-tree transformations are made possible by override of
|
||||
``__default__``, ``__default_token__``, and ``__default_ambig__``.
|
||||
|
||||
Note:
|
||||
Tree shaping features such as inlined rules and token filtering are
|
||||
not built into the transformation. Positions are also not propagated.
|
||||
|
||||
Parameters:
|
||||
tree_class: The tree class to use for construction
|
||||
prioritizer: A ``ForestVisitor`` that manipulates the priorities of nodes in the SPPF.
|
||||
resolve_ambiguity: If True, ambiguities will be resolved based on priorities.
|
||||
use_cache (bool): If True, caches the results of some transformations,
|
||||
potentially improving performance when ``resolve_ambiguity==False``.
|
||||
Only use if you know what you are doing: i.e. All transformation
|
||||
functions are pure and referentially transparent.
|
||||
"""
|
||||
|
||||
def __init__(self, tree_class=Tree, prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=False):
|
||||
super(TreeForestTransformer, self).__init__(tree_class, dict(), prioritizer, resolve_ambiguity, use_cache)
|
||||
|
||||
def __default__(self, name, data):
|
||||
"""Default operation on tree (for override).
|
||||
|
||||
Returns a tree with name with data as children.
|
||||
"""
|
||||
return self.tree_class(name, data)
|
||||
|
||||
def __default_ambig__(self, name, data):
|
||||
"""Default operation on ambiguous rule (for override).
|
||||
|
||||
Wraps data in an '_ambig_' node if it contains more than
|
||||
one element.
|
||||
"""
|
||||
if len(data) > 1:
|
||||
return self.tree_class('_ambig', data)
|
||||
elif data:
|
||||
return data[0]
|
||||
return Discard
|
||||
|
||||
def __default_token__(self, node):
|
||||
"""Default operation on ``Token`` (for override).
|
||||
|
||||
Returns ``node``.
|
||||
"""
|
||||
return node
|
||||
|
||||
def transform_token_node(self, node):
|
||||
return getattr(self, node.type, self.__default_token__)(node)
|
||||
|
||||
def _call_rule_func(self, node, data):
|
||||
name = node.rule.alias or node.rule.options.template_source or node.rule.origin.name
|
||||
user_func = getattr(self, name, self.__default__)
|
||||
if user_func == self.__default__ or hasattr(user_func, 'handles_ambiguity'):
|
||||
user_func = partial(self.__default__, name)
|
||||
if not self.resolve_ambiguity:
|
||||
wrapper = partial(AmbiguousIntermediateExpander, self.tree_class)
|
||||
user_func = wrapper(user_func)
|
||||
return user_func(data)
|
||||
|
||||
def _call_ambig_func(self, node, data):
|
||||
name = node.s.name
|
||||
user_func = getattr(self, name, self.__default_ambig__)
|
||||
if user_func == self.__default_ambig__ or not hasattr(user_func, 'handles_ambiguity'):
|
||||
user_func = partial(self.__default_ambig__, name)
|
||||
return user_func(data)
|
||||
|
||||
class ForestToPyDotVisitor(ForestVisitor):
|
||||
"""
|
||||
A Forest visitor which writes the SPPF to a PNG.
|
||||
|
||||
The SPPF can get really large, really quickly because
|
||||
of the amount of meta-data it stores, so this is probably
|
||||
only useful for trivial trees and learning how the SPPF
|
||||
is structured.
|
||||
"""
|
||||
def __init__(self, rankdir="TB"):
|
||||
super(ForestToPyDotVisitor, self).__init__(single_visit=True)
|
||||
self.pydot = import_module('pydot')
|
||||
self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir)
|
||||
|
||||
def visit(self, root, filename):
|
||||
super(ForestToPyDotVisitor, self).visit(root)
|
||||
try:
|
||||
self.graph.write_png(filename)
|
||||
except FileNotFoundError as e:
|
||||
logger.error("Could not write png: ", e)
|
||||
|
||||
def visit_token_node(self, node):
|
||||
graph_node_id = str(id(node))
|
||||
graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"'))
|
||||
graph_node_color = 0x808080
|
||||
graph_node_style = "\"filled,rounded\""
|
||||
graph_node_shape = "diamond"
|
||||
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
|
||||
self.graph.add_node(graph_node)
|
||||
|
||||
def visit_packed_node_in(self, node):
|
||||
graph_node_id = str(id(node))
|
||||
graph_node_label = repr(node)
|
||||
graph_node_color = 0x808080
|
||||
graph_node_style = "filled"
|
||||
graph_node_shape = "diamond"
|
||||
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
|
||||
self.graph.add_node(graph_node)
|
||||
yield node.left
|
||||
yield node.right
|
||||
|
||||
def visit_packed_node_out(self, node):
|
||||
graph_node_id = str(id(node))
|
||||
graph_node = self.graph.get_node(graph_node_id)[0]
|
||||
for child in [node.left, node.right]:
|
||||
if child is not None:
|
||||
child_graph_node_id = str(id(child.token if isinstance(child, TokenNode) else child))
|
||||
child_graph_node = self.graph.get_node(child_graph_node_id)[0]
|
||||
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
|
||||
else:
|
||||
#### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay.
|
||||
child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890))
|
||||
child_graph_node_style = "invis"
|
||||
child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None")
|
||||
child_edge_style = "invis"
|
||||
self.graph.add_node(child_graph_node)
|
||||
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style))
|
||||
|
||||
def visit_symbol_node_in(self, node):
|
||||
graph_node_id = str(id(node))
|
||||
graph_node_label = repr(node)
|
||||
graph_node_color = 0x808080
|
||||
graph_node_style = "\"filled\""
|
||||
if node.is_intermediate:
|
||||
graph_node_shape = "ellipse"
|
||||
else:
|
||||
graph_node_shape = "rectangle"
|
||||
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
|
||||
self.graph.add_node(graph_node)
|
||||
return iter(node.children)
|
||||
|
||||
def visit_symbol_node_out(self, node):
|
||||
graph_node_id = str(id(node))
|
||||
graph_node = self.graph.get_node(graph_node_id)[0]
|
||||
for child in node.children:
|
||||
child_graph_node_id = str(id(child))
|
||||
child_graph_node = self.graph.get_node(child_graph_node_id)[0]
|
||||
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
|
||||
203
ccxt/static_dependencies/lark/parsers/grammar_analysis.py
Normal file
203
ccxt/static_dependencies/lark/parsers/grammar_analysis.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"Provides for superficial grammar analysis."
|
||||
|
||||
from collections import Counter, defaultdict
|
||||
from typing import List, Dict, Iterator, FrozenSet, Set
|
||||
|
||||
from ..utils import bfs, fzset, classify
|
||||
from ..exceptions import GrammarError
|
||||
from ..grammar import Rule, Terminal, NonTerminal, Symbol
|
||||
from ..common import ParserConf
|
||||
|
||||
|
||||
class RulePtr:
|
||||
__slots__ = ('rule', 'index')
|
||||
rule: Rule
|
||||
index: int
|
||||
|
||||
def __init__(self, rule: Rule, index: int):
|
||||
assert isinstance(rule, Rule)
|
||||
assert index <= len(rule.expansion)
|
||||
self.rule = rule
|
||||
self.index = index
|
||||
|
||||
def __repr__(self):
|
||||
before = [x.name for x in self.rule.expansion[:self.index]]
|
||||
after = [x.name for x in self.rule.expansion[self.index:]]
|
||||
return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
|
||||
|
||||
@property
|
||||
def next(self) -> Symbol:
|
||||
return self.rule.expansion[self.index]
|
||||
|
||||
def advance(self, sym: Symbol) -> 'RulePtr':
|
||||
assert self.next == sym
|
||||
return RulePtr(self.rule, self.index+1)
|
||||
|
||||
@property
|
||||
def is_satisfied(self) -> bool:
|
||||
return self.index == len(self.rule.expansion)
|
||||
|
||||
def __eq__(self, other) -> bool:
|
||||
if not isinstance(other, RulePtr):
|
||||
return NotImplemented
|
||||
return self.rule == other.rule and self.index == other.index
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash((self.rule, self.index))
|
||||
|
||||
|
||||
State = FrozenSet[RulePtr]
|
||||
|
||||
# state generation ensures no duplicate LR0ItemSets
|
||||
class LR0ItemSet:
|
||||
__slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
|
||||
|
||||
kernel: State
|
||||
closure: State
|
||||
transitions: Dict[Symbol, 'LR0ItemSet']
|
||||
lookaheads: Dict[Symbol, Set[Rule]]
|
||||
|
||||
def __init__(self, kernel, closure):
|
||||
self.kernel = fzset(kernel)
|
||||
self.closure = fzset(closure)
|
||||
self.transitions = {}
|
||||
self.lookaheads = defaultdict(set)
|
||||
|
||||
def __repr__(self):
|
||||
return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
|
||||
|
||||
|
||||
def update_set(set1, set2):
|
||||
if not set2 or set1 > set2:
|
||||
return False
|
||||
|
||||
copy = set(set1)
|
||||
set1 |= set2
|
||||
return set1 != copy
|
||||
|
||||
def calculate_sets(rules):
|
||||
"""Calculate FOLLOW sets.
|
||||
|
||||
Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
|
||||
symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
|
||||
|
||||
# foreach grammar rule X ::= Y(1) ... Y(k)
|
||||
# if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
|
||||
# NULLABLE = NULLABLE union {X}
|
||||
# for i = 1 to k
|
||||
# if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
|
||||
# FIRST(X) = FIRST(X) union FIRST(Y(i))
|
||||
# for j = i+1 to k
|
||||
# if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
|
||||
# FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
|
||||
# if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
|
||||
# FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
|
||||
# until none of NULLABLE,FIRST,FOLLOW changed in last iteration
|
||||
|
||||
NULLABLE = set()
|
||||
FIRST = {}
|
||||
FOLLOW = {}
|
||||
for sym in symbols:
|
||||
FIRST[sym]={sym} if sym.is_term else set()
|
||||
FOLLOW[sym]=set()
|
||||
|
||||
# Calculate NULLABLE and FIRST
|
||||
changed = True
|
||||
while changed:
|
||||
changed = False
|
||||
|
||||
for rule in rules:
|
||||
if set(rule.expansion) <= NULLABLE:
|
||||
if update_set(NULLABLE, {rule.origin}):
|
||||
changed = True
|
||||
|
||||
for i, sym in enumerate(rule.expansion):
|
||||
if set(rule.expansion[:i]) <= NULLABLE:
|
||||
if update_set(FIRST[rule.origin], FIRST[sym]):
|
||||
changed = True
|
||||
else:
|
||||
break
|
||||
|
||||
# Calculate FOLLOW
|
||||
changed = True
|
||||
while changed:
|
||||
changed = False
|
||||
|
||||
for rule in rules:
|
||||
for i, sym in enumerate(rule.expansion):
|
||||
if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
|
||||
if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
|
||||
changed = True
|
||||
|
||||
for j in range(i+1, len(rule.expansion)):
|
||||
if set(rule.expansion[i+1:j]) <= NULLABLE:
|
||||
if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
|
||||
changed = True
|
||||
|
||||
return FIRST, FOLLOW, NULLABLE
|
||||
|
||||
|
||||
class GrammarAnalyzer:
|
||||
def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
|
||||
self.debug = debug
|
||||
self.strict = strict
|
||||
|
||||
root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
|
||||
for start in parser_conf.start}
|
||||
|
||||
rules = parser_conf.rules + list(root_rules.values())
|
||||
self.rules_by_origin: Dict[NonTerminal, List[Rule]] = classify(rules, lambda r: r.origin)
|
||||
|
||||
if len(rules) != len(set(rules)):
|
||||
duplicates = [item for item, count in Counter(rules).items() if count > 1]
|
||||
raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
|
||||
|
||||
for r in rules:
|
||||
for sym in r.expansion:
|
||||
if not (sym.is_term or sym in self.rules_by_origin):
|
||||
raise GrammarError("Using an undefined rule: %s" % sym)
|
||||
|
||||
self.start_states = {start: self.expand_rule(root_rule.origin)
|
||||
for start, root_rule in root_rules.items()}
|
||||
|
||||
self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
|
||||
for start, root_rule in root_rules.items()}
|
||||
|
||||
lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
|
||||
for start in parser_conf.start}
|
||||
|
||||
lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
|
||||
assert(len(lr0_rules) == len(set(lr0_rules)))
|
||||
|
||||
self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
|
||||
|
||||
# cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
|
||||
self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
|
||||
for start, root_rule in lr0_root_rules.items()}
|
||||
|
||||
self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
|
||||
|
||||
def expand_rule(self, source_rule: NonTerminal, rules_by_origin=None) -> State:
|
||||
"Returns all init_ptrs accessible by rule (recursive)"
|
||||
|
||||
if rules_by_origin is None:
|
||||
rules_by_origin = self.rules_by_origin
|
||||
|
||||
init_ptrs = set()
|
||||
def _expand_rule(rule: NonTerminal) -> Iterator[NonTerminal]:
|
||||
assert not rule.is_term, rule
|
||||
|
||||
for r in rules_by_origin[rule]:
|
||||
init_ptr = RulePtr(r, 0)
|
||||
init_ptrs.add(init_ptr)
|
||||
|
||||
if r.expansion: # if not empty rule
|
||||
new_r = init_ptr.next
|
||||
if not new_r.is_term:
|
||||
assert isinstance(new_r, NonTerminal)
|
||||
yield new_r
|
||||
|
||||
for _ in bfs([source_rule], _expand_rule):
|
||||
pass
|
||||
|
||||
return fzset(init_ptrs)
|
||||
332
ccxt/static_dependencies/lark/parsers/lalr_analysis.py
Normal file
332
ccxt/static_dependencies/lark/parsers/lalr_analysis.py
Normal file
@@ -0,0 +1,332 @@
|
||||
"""This module builds a LALR(1) transition-table for lalr_parser.py
|
||||
|
||||
For now, shift/reduce conflicts are automatically resolved as shifts.
|
||||
"""
|
||||
|
||||
# Author: Erez Shinan (2017)
|
||||
# Email : erezshin@gmail.com
|
||||
|
||||
from typing import Dict, Set, Iterator, Tuple, List, TypeVar, Generic
|
||||
from collections import defaultdict
|
||||
|
||||
from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger
|
||||
from ..exceptions import GrammarError
|
||||
|
||||
from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet, RulePtr, State
|
||||
from ..grammar import Rule, Symbol
|
||||
from ..common import ParserConf
|
||||
|
||||
###{standalone
|
||||
|
||||
class Action:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
def __str__(self):
|
||||
return self.name
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
Shift = Action('Shift')
|
||||
Reduce = Action('Reduce')
|
||||
|
||||
StateT = TypeVar("StateT")
|
||||
|
||||
class ParseTableBase(Generic[StateT]):
|
||||
states: Dict[StateT, Dict[str, Tuple]]
|
||||
start_states: Dict[str, StateT]
|
||||
end_states: Dict[str, StateT]
|
||||
|
||||
def __init__(self, states, start_states, end_states):
|
||||
self.states = states
|
||||
self.start_states = start_states
|
||||
self.end_states = end_states
|
||||
|
||||
def serialize(self, memo):
|
||||
tokens = Enumerator()
|
||||
|
||||
states = {
|
||||
state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
|
||||
for token, (action, arg) in actions.items()}
|
||||
for state, actions in self.states.items()
|
||||
}
|
||||
|
||||
return {
|
||||
'tokens': tokens.reversed(),
|
||||
'states': states,
|
||||
'start_states': self.start_states,
|
||||
'end_states': self.end_states,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def deserialize(cls, data, memo):
|
||||
tokens = data['tokens']
|
||||
states = {
|
||||
state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
|
||||
for token, (action, arg) in actions.items()}
|
||||
for state, actions in data['states'].items()
|
||||
}
|
||||
return cls(states, data['start_states'], data['end_states'])
|
||||
|
||||
class ParseTable(ParseTableBase['State']):
|
||||
"""Parse-table whose key is State, i.e. set[RulePtr]
|
||||
|
||||
Slower than IntParseTable, but useful for debugging
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class IntParseTable(ParseTableBase[int]):
|
||||
"""Parse-table whose key is int. Best for performance."""
|
||||
|
||||
@classmethod
|
||||
def from_ParseTable(cls, parse_table: ParseTable):
|
||||
enum = list(parse_table.states)
|
||||
state_to_idx: Dict['State', int] = {s:i for i,s in enumerate(enum)}
|
||||
int_states = {}
|
||||
|
||||
for s, la in parse_table.states.items():
|
||||
la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
|
||||
for k,v in la.items()}
|
||||
int_states[ state_to_idx[s] ] = la
|
||||
|
||||
|
||||
start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
|
||||
end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
|
||||
return cls(int_states, start_states, end_states)
|
||||
|
||||
###}
|
||||
|
||||
|
||||
# digraph and traverse, see The Theory and Practice of Compiler Writing
|
||||
|
||||
# computes F(x) = G(x) union (union { G(y) | x R y })
|
||||
# X: nodes
|
||||
# R: relation (function mapping node -> list of nodes that satisfy the relation)
|
||||
# G: set valued function
|
||||
def digraph(X, R, G):
|
||||
F = {}
|
||||
S = []
|
||||
N = dict.fromkeys(X, 0)
|
||||
for x in X:
|
||||
# this is always true for the first iteration, but N[x] may be updated in traverse below
|
||||
if N[x] == 0:
|
||||
traverse(x, S, N, X, R, G, F)
|
||||
return F
|
||||
|
||||
# x: single node
|
||||
# S: stack
|
||||
# N: weights
|
||||
# X: nodes
|
||||
# R: relation (see above)
|
||||
# G: set valued function
|
||||
# F: set valued function we are computing (map of input -> output)
|
||||
def traverse(x, S, N, X, R, G, F):
|
||||
S.append(x)
|
||||
d = len(S)
|
||||
N[x] = d
|
||||
F[x] = G[x]
|
||||
for y in R[x]:
|
||||
if N[y] == 0:
|
||||
traverse(y, S, N, X, R, G, F)
|
||||
n_x = N[x]
|
||||
assert(n_x > 0)
|
||||
n_y = N[y]
|
||||
assert(n_y != 0)
|
||||
if (n_y > 0) and (n_y < n_x):
|
||||
N[x] = n_y
|
||||
F[x].update(F[y])
|
||||
if N[x] == d:
|
||||
f_x = F[x]
|
||||
while True:
|
||||
z = S.pop()
|
||||
N[z] = -1
|
||||
F[z] = f_x
|
||||
if z == x:
|
||||
break
|
||||
|
||||
|
||||
class LALR_Analyzer(GrammarAnalyzer):
|
||||
lr0_itemsets: Set[LR0ItemSet]
|
||||
nonterminal_transitions: List[Tuple[LR0ItemSet, Symbol]]
|
||||
lookback: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Rule]]]
|
||||
includes: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Symbol]]]
|
||||
reads: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Symbol]]]
|
||||
directly_reads: Dict[Tuple[LR0ItemSet, Symbol], Set[Symbol]]
|
||||
|
||||
|
||||
def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
|
||||
GrammarAnalyzer.__init__(self, parser_conf, debug, strict)
|
||||
self.nonterminal_transitions = []
|
||||
self.directly_reads = defaultdict(set)
|
||||
self.reads = defaultdict(set)
|
||||
self.includes = defaultdict(set)
|
||||
self.lookback = defaultdict(set)
|
||||
|
||||
|
||||
def compute_lr0_states(self) -> None:
|
||||
self.lr0_itemsets = set()
|
||||
# map of kernels to LR0ItemSets
|
||||
cache: Dict['State', LR0ItemSet] = {}
|
||||
|
||||
def step(state: LR0ItemSet) -> Iterator[LR0ItemSet]:
|
||||
_, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied)
|
||||
|
||||
d = classify(unsat, lambda rp: rp.next)
|
||||
for sym, rps in d.items():
|
||||
kernel = fzset({rp.advance(sym) for rp in rps})
|
||||
new_state = cache.get(kernel, None)
|
||||
if new_state is None:
|
||||
closure = set(kernel)
|
||||
for rp in kernel:
|
||||
if not rp.is_satisfied and not rp.next.is_term:
|
||||
closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)
|
||||
new_state = LR0ItemSet(kernel, closure)
|
||||
cache[kernel] = new_state
|
||||
|
||||
state.transitions[sym] = new_state
|
||||
yield new_state
|
||||
|
||||
self.lr0_itemsets.add(state)
|
||||
|
||||
for _ in bfs(self.lr0_start_states.values(), step):
|
||||
pass
|
||||
|
||||
def compute_reads_relations(self):
|
||||
# handle start state
|
||||
for root in self.lr0_start_states.values():
|
||||
assert(len(root.kernel) == 1)
|
||||
for rp in root.kernel:
|
||||
assert(rp.index == 0)
|
||||
self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])
|
||||
|
||||
for state in self.lr0_itemsets:
|
||||
seen = set()
|
||||
for rp in state.closure:
|
||||
if rp.is_satisfied:
|
||||
continue
|
||||
s = rp.next
|
||||
# if s is a not a nonterminal
|
||||
if s not in self.lr0_rules_by_origin:
|
||||
continue
|
||||
if s in seen:
|
||||
continue
|
||||
seen.add(s)
|
||||
nt = (state, s)
|
||||
self.nonterminal_transitions.append(nt)
|
||||
dr = self.directly_reads[nt]
|
||||
r = self.reads[nt]
|
||||
next_state = state.transitions[s]
|
||||
for rp2 in next_state.closure:
|
||||
if rp2.is_satisfied:
|
||||
continue
|
||||
s2 = rp2.next
|
||||
# if s2 is a terminal
|
||||
if s2 not in self.lr0_rules_by_origin:
|
||||
dr.add(s2)
|
||||
if s2 in self.NULLABLE:
|
||||
r.add((next_state, s2))
|
||||
|
||||
def compute_includes_lookback(self):
|
||||
for nt in self.nonterminal_transitions:
|
||||
state, nonterminal = nt
|
||||
includes = []
|
||||
lookback = self.lookback[nt]
|
||||
for rp in state.closure:
|
||||
if rp.rule.origin != nonterminal:
|
||||
continue
|
||||
# traverse the states for rp(.rule)
|
||||
state2 = state
|
||||
for i in range(rp.index, len(rp.rule.expansion)):
|
||||
s = rp.rule.expansion[i]
|
||||
nt2 = (state2, s)
|
||||
state2 = state2.transitions[s]
|
||||
if nt2 not in self.reads:
|
||||
continue
|
||||
for j in range(i + 1, len(rp.rule.expansion)):
|
||||
if rp.rule.expansion[j] not in self.NULLABLE:
|
||||
break
|
||||
else:
|
||||
includes.append(nt2)
|
||||
# state2 is at the final state for rp.rule
|
||||
if rp.index == 0:
|
||||
for rp2 in state2.closure:
|
||||
if (rp2.rule == rp.rule) and rp2.is_satisfied:
|
||||
lookback.add((state2, rp2.rule))
|
||||
for nt2 in includes:
|
||||
self.includes[nt2].add(nt)
|
||||
|
||||
def compute_lookaheads(self):
|
||||
read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads)
|
||||
follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets)
|
||||
|
||||
for nt, lookbacks in self.lookback.items():
|
||||
for state, rule in lookbacks:
|
||||
for s in follow_sets[nt]:
|
||||
state.lookaheads[s].add(rule)
|
||||
|
||||
def compute_lalr1_states(self) -> None:
|
||||
m: Dict[LR0ItemSet, Dict[str, Tuple]] = {}
|
||||
reduce_reduce = []
|
||||
for itemset in self.lr0_itemsets:
|
||||
actions: Dict[Symbol, Tuple] = {la: (Shift, next_state.closure)
|
||||
for la, next_state in itemset.transitions.items()}
|
||||
for la, rules in itemset.lookaheads.items():
|
||||
if len(rules) > 1:
|
||||
# Try to resolve conflict based on priority
|
||||
p = [(r.options.priority or 0, r) for r in rules]
|
||||
p.sort(key=lambda r: r[0], reverse=True)
|
||||
best, second_best = p[:2]
|
||||
if best[0] > second_best[0]:
|
||||
rules = {best[1]}
|
||||
else:
|
||||
reduce_reduce.append((itemset, la, rules))
|
||||
continue
|
||||
|
||||
rule ,= rules
|
||||
if la in actions:
|
||||
if self.strict:
|
||||
raise GrammarError(f"Shift/Reduce conflict for terminal {la.name}. [strict-mode]\n ")
|
||||
elif self.debug:
|
||||
logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
|
||||
logger.warning(' * %s', rule)
|
||||
else:
|
||||
logger.debug('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
|
||||
logger.debug(' * %s', rule)
|
||||
else:
|
||||
actions[la] = (Reduce, rule)
|
||||
m[itemset] = { k.name: v for k, v in actions.items() }
|
||||
|
||||
if reduce_reduce:
|
||||
msgs = []
|
||||
for itemset, la, rules in reduce_reduce:
|
||||
msg = 'Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t- ' + str(r) for r in rules ]))
|
||||
if self.debug:
|
||||
msg += '\n collision occurred in state: {%s\n }' % ''.join(['\n\t' + str(x) for x in itemset.closure])
|
||||
msgs.append(msg)
|
||||
raise GrammarError('\n\n'.join(msgs))
|
||||
|
||||
states = { k.closure: v for k, v in m.items() }
|
||||
|
||||
# compute end states
|
||||
end_states: Dict[str, 'State'] = {}
|
||||
for state in states:
|
||||
for rp in state:
|
||||
for start in self.lr0_start_states:
|
||||
if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied:
|
||||
assert start not in end_states
|
||||
end_states[start] = state
|
||||
|
||||
start_states = { start: state.closure for start, state in self.lr0_start_states.items() }
|
||||
_parse_table = ParseTable(states, start_states, end_states)
|
||||
|
||||
if self.debug:
|
||||
self.parse_table = _parse_table
|
||||
else:
|
||||
self.parse_table = IntParseTable.from_ParseTable(_parse_table)
|
||||
|
||||
def compute_lalr(self):
|
||||
self.compute_lr0_states()
|
||||
self.compute_reads_relations()
|
||||
self.compute_includes_lookback()
|
||||
self.compute_lookaheads()
|
||||
self.compute_lalr1_states()
|
||||
158
ccxt/static_dependencies/lark/parsers/lalr_interactive_parser.py
Normal file
158
ccxt/static_dependencies/lark/parsers/lalr_interactive_parser.py
Normal file
@@ -0,0 +1,158 @@
|
||||
# This module provides a LALR interactive parser, which is used for debugging and error handling
|
||||
|
||||
from typing import Iterator, List
|
||||
from copy import copy
|
||||
import warnings
|
||||
|
||||
from ..exceptions import UnexpectedToken
|
||||
from ..lexer import Token, LexerThread
|
||||
from .lalr_parser_state import ParserState
|
||||
|
||||
###{standalone
|
||||
|
||||
class InteractiveParser:
|
||||
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.
|
||||
|
||||
For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
|
||||
"""
|
||||
def __init__(self, parser, parser_state: ParserState, lexer_thread: LexerThread):
|
||||
self.parser = parser
|
||||
self.parser_state = parser_state
|
||||
self.lexer_thread = lexer_thread
|
||||
self.result = None
|
||||
|
||||
@property
|
||||
def lexer_state(self) -> LexerThread:
|
||||
warnings.warn("lexer_state will be removed in subsequent releases. Use lexer_thread instead.", DeprecationWarning)
|
||||
return self.lexer_thread
|
||||
|
||||
def feed_token(self, token: Token):
|
||||
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.
|
||||
|
||||
Note that ``token`` has to be an instance of ``Token``.
|
||||
"""
|
||||
return self.parser_state.feed_token(token, token.type == '$END')
|
||||
|
||||
def iter_parse(self) -> Iterator[Token]:
|
||||
"""Step through the different stages of the parse, by reading tokens from the lexer
|
||||
and feeding them to the parser, one per iteration.
|
||||
|
||||
Returns an iterator of the tokens it encounters.
|
||||
|
||||
When the parse is over, the resulting tree can be found in ``InteractiveParser.result``.
|
||||
"""
|
||||
for token in self.lexer_thread.lex(self.parser_state):
|
||||
yield token
|
||||
self.result = self.feed_token(token)
|
||||
|
||||
def exhaust_lexer(self) -> List[Token]:
|
||||
"""Try to feed the rest of the lexer state into the interactive parser.
|
||||
|
||||
Note that this modifies the instance in place and does not feed an '$END' Token
|
||||
"""
|
||||
return list(self.iter_parse())
|
||||
|
||||
|
||||
def feed_eof(self, last_token=None):
|
||||
"""Feed a '$END' Token. Borrows from 'last_token' if given."""
|
||||
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else self.lexer_thread._Token('$END', '', 0, 1, 1)
|
||||
return self.feed_token(eof)
|
||||
|
||||
|
||||
def __copy__(self):
|
||||
"""Create a new interactive parser with a separate state.
|
||||
|
||||
Calls to feed_token() won't affect the old instance, and vice-versa.
|
||||
"""
|
||||
return self.copy()
|
||||
|
||||
def copy(self, deepcopy_values=True):
|
||||
return type(self)(
|
||||
self.parser,
|
||||
self.parser_state.copy(deepcopy_values=deepcopy_values),
|
||||
copy(self.lexer_thread),
|
||||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, InteractiveParser):
|
||||
return False
|
||||
|
||||
return self.parser_state == other.parser_state and self.lexer_thread == other.lexer_thread
|
||||
|
||||
def as_immutable(self):
|
||||
"""Convert to an ``ImmutableInteractiveParser``."""
|
||||
p = copy(self)
|
||||
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread)
|
||||
|
||||
def pretty(self):
|
||||
"""Print the output of ``choices()`` in a way that's easier to read."""
|
||||
out = ["Parser choices:"]
|
||||
for k, v in self.choices().items():
|
||||
out.append('\t- %s -> %r' % (k, v))
|
||||
out.append('stack size: %s' % len(self.parser_state.state_stack))
|
||||
return '\n'.join(out)
|
||||
|
||||
def choices(self):
|
||||
"""Returns a dictionary of token types, matched to their action in the parser.
|
||||
|
||||
Only returns token types that are accepted by the current state.
|
||||
|
||||
Updated by ``feed_token()``.
|
||||
"""
|
||||
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]
|
||||
|
||||
def accepts(self):
|
||||
"""Returns the set of possible tokens that will advance the parser into a new valid state."""
|
||||
accepts = set()
|
||||
conf_no_callbacks = copy(self.parser_state.parse_conf)
|
||||
# We don't want to call callbacks here since those might have arbitrary side effects
|
||||
# and are unnecessarily slow.
|
||||
conf_no_callbacks.callbacks = {}
|
||||
for t in self.choices():
|
||||
if t.isupper(): # is terminal?
|
||||
new_cursor = self.copy(deepcopy_values=False)
|
||||
new_cursor.parser_state.parse_conf = conf_no_callbacks
|
||||
try:
|
||||
new_cursor.feed_token(self.lexer_thread._Token(t, ''))
|
||||
except UnexpectedToken:
|
||||
pass
|
||||
else:
|
||||
accepts.add(t)
|
||||
return accepts
|
||||
|
||||
def resume_parse(self):
|
||||
"""Resume automated parsing from the current state.
|
||||
"""
|
||||
return self.parser.parse_from_state(self.parser_state, last_token=self.lexer_thread.state.last_token)
|
||||
|
||||
|
||||
|
||||
class ImmutableInteractiveParser(InteractiveParser):
|
||||
"""Same as ``InteractiveParser``, but operations create a new instance instead
|
||||
of changing it in-place.
|
||||
"""
|
||||
|
||||
result = None
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.parser_state, self.lexer_thread))
|
||||
|
||||
def feed_token(self, token):
|
||||
c = copy(self)
|
||||
c.result = InteractiveParser.feed_token(c, token)
|
||||
return c
|
||||
|
||||
def exhaust_lexer(self):
|
||||
"""Try to feed the rest of the lexer state into the parser.
|
||||
|
||||
Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
|
||||
cursor = self.as_mutable()
|
||||
cursor.exhaust_lexer()
|
||||
return cursor.as_immutable()
|
||||
|
||||
def as_mutable(self):
|
||||
"""Convert to an ``InteractiveParser``."""
|
||||
p = copy(self)
|
||||
return InteractiveParser(p.parser, p.parser_state, p.lexer_thread)
|
||||
|
||||
###}
|
||||
122
ccxt/static_dependencies/lark/parsers/lalr_parser.py
Normal file
122
ccxt/static_dependencies/lark/parsers/lalr_parser.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""This module implements a LALR(1) Parser
|
||||
"""
|
||||
# Author: Erez Shinan (2017)
|
||||
# Email : erezshin@gmail.com
|
||||
from typing import Dict, Any, Optional
|
||||
from ..lexer import Token, LexerThread
|
||||
from ..utils import Serialize
|
||||
from ..common import ParserConf, ParserCallbacks
|
||||
|
||||
from .lalr_analysis import LALR_Analyzer, IntParseTable, ParseTableBase
|
||||
from .lalr_interactive_parser import InteractiveParser
|
||||
from ..exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
|
||||
from .lalr_parser_state import ParserState, ParseConf
|
||||
|
||||
###{standalone
|
||||
|
||||
class LALR_Parser(Serialize):
|
||||
def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
|
||||
analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict)
|
||||
analysis.compute_lalr()
|
||||
callbacks = parser_conf.callbacks
|
||||
|
||||
self._parse_table = analysis.parse_table
|
||||
self.parser_conf = parser_conf
|
||||
self.parser = _Parser(analysis.parse_table, callbacks, debug)
|
||||
|
||||
@classmethod
|
||||
def deserialize(cls, data, memo, callbacks, debug=False):
|
||||
inst = cls.__new__(cls)
|
||||
inst._parse_table = IntParseTable.deserialize(data, memo)
|
||||
inst.parser = _Parser(inst._parse_table, callbacks, debug)
|
||||
return inst
|
||||
|
||||
def serialize(self, memo: Any = None) -> Dict[str, Any]:
|
||||
return self._parse_table.serialize(memo)
|
||||
|
||||
def parse_interactive(self, lexer: LexerThread, start: str):
|
||||
return self.parser.parse(lexer, start, start_interactive=True)
|
||||
|
||||
def parse(self, lexer, start, on_error=None):
|
||||
try:
|
||||
return self.parser.parse(lexer, start)
|
||||
except UnexpectedInput as e:
|
||||
if on_error is None:
|
||||
raise
|
||||
|
||||
while True:
|
||||
if isinstance(e, UnexpectedCharacters):
|
||||
s = e.interactive_parser.lexer_thread.state
|
||||
p = s.line_ctr.char_pos
|
||||
|
||||
if not on_error(e):
|
||||
raise e
|
||||
|
||||
if isinstance(e, UnexpectedCharacters):
|
||||
# If user didn't change the character position, then we should
|
||||
if p == s.line_ctr.char_pos:
|
||||
s.line_ctr.feed(s.text[p:p+1])
|
||||
|
||||
try:
|
||||
return e.interactive_parser.resume_parse()
|
||||
except UnexpectedToken as e2:
|
||||
if (isinstance(e, UnexpectedToken)
|
||||
and e.token.type == e2.token.type == '$END'
|
||||
and e.interactive_parser == e2.interactive_parser):
|
||||
# Prevent infinite loop
|
||||
raise e2
|
||||
e = e2
|
||||
except UnexpectedCharacters as e2:
|
||||
e = e2
|
||||
|
||||
|
||||
class _Parser:
|
||||
parse_table: ParseTableBase
|
||||
callbacks: ParserCallbacks
|
||||
debug: bool
|
||||
|
||||
def __init__(self, parse_table: ParseTableBase, callbacks: ParserCallbacks, debug: bool=False):
|
||||
self.parse_table = parse_table
|
||||
self.callbacks = callbacks
|
||||
self.debug = debug
|
||||
|
||||
def parse(self, lexer: LexerThread, start: str, value_stack=None, state_stack=None, start_interactive=False):
|
||||
parse_conf = ParseConf(self.parse_table, self.callbacks, start)
|
||||
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
|
||||
if start_interactive:
|
||||
return InteractiveParser(self, parser_state, parser_state.lexer)
|
||||
return self.parse_from_state(parser_state)
|
||||
|
||||
|
||||
def parse_from_state(self, state: ParserState, last_token: Optional[Token]=None):
|
||||
"""Run the main LALR parser loop
|
||||
|
||||
Parameters:
|
||||
state - the initial state. Changed in-place.
|
||||
last_token - Used only for line information in case of an empty lexer.
|
||||
"""
|
||||
try:
|
||||
token = last_token
|
||||
for token in state.lexer.lex(state):
|
||||
assert token is not None
|
||||
state.feed_token(token)
|
||||
|
||||
end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
|
||||
return state.feed_token(end_token, True)
|
||||
except UnexpectedInput as e:
|
||||
try:
|
||||
e.interactive_parser = InteractiveParser(self, state, state.lexer)
|
||||
except NameError:
|
||||
pass
|
||||
raise e
|
||||
except Exception as e:
|
||||
if self.debug:
|
||||
print("")
|
||||
print("STATE STACK DUMP")
|
||||
print("----------------")
|
||||
for i, s in enumerate(state.state_stack):
|
||||
print('%d)' % i , s)
|
||||
print("")
|
||||
|
||||
raise
|
||||
###}
|
||||
110
ccxt/static_dependencies/lark/parsers/lalr_parser_state.py
Normal file
110
ccxt/static_dependencies/lark/parsers/lalr_parser_state.py
Normal file
@@ -0,0 +1,110 @@
|
||||
from copy import deepcopy, copy
|
||||
from typing import Dict, Any, Generic, List
|
||||
from ..lexer import Token, LexerThread
|
||||
from ..common import ParserCallbacks
|
||||
|
||||
from .lalr_analysis import Shift, ParseTableBase, StateT
|
||||
from ..exceptions import UnexpectedToken
|
||||
|
||||
###{standalone
|
||||
|
||||
class ParseConf(Generic[StateT]):
|
||||
__slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
|
||||
|
||||
parse_table: ParseTableBase[StateT]
|
||||
callbacks: ParserCallbacks
|
||||
start: str
|
||||
|
||||
start_state: StateT
|
||||
end_state: StateT
|
||||
states: Dict[StateT, Dict[str, tuple]]
|
||||
|
||||
def __init__(self, parse_table: ParseTableBase[StateT], callbacks: ParserCallbacks, start: str):
|
||||
self.parse_table = parse_table
|
||||
|
||||
self.start_state = self.parse_table.start_states[start]
|
||||
self.end_state = self.parse_table.end_states[start]
|
||||
self.states = self.parse_table.states
|
||||
|
||||
self.callbacks = callbacks
|
||||
self.start = start
|
||||
|
||||
class ParserState(Generic[StateT]):
|
||||
__slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
|
||||
|
||||
parse_conf: ParseConf[StateT]
|
||||
lexer: LexerThread
|
||||
state_stack: List[StateT]
|
||||
value_stack: list
|
||||
|
||||
def __init__(self, parse_conf: ParseConf[StateT], lexer: LexerThread, state_stack=None, value_stack=None):
|
||||
self.parse_conf = parse_conf
|
||||
self.lexer = lexer
|
||||
self.state_stack = state_stack or [self.parse_conf.start_state]
|
||||
self.value_stack = value_stack or []
|
||||
|
||||
@property
|
||||
def position(self) -> StateT:
|
||||
return self.state_stack[-1]
|
||||
|
||||
# Necessary for match_examples() to work
|
||||
def __eq__(self, other) -> bool:
|
||||
if not isinstance(other, ParserState):
|
||||
return NotImplemented
|
||||
return len(self.state_stack) == len(other.state_stack) and self.position == other.position
|
||||
|
||||
def __copy__(self):
|
||||
return self.copy()
|
||||
|
||||
def copy(self, deepcopy_values=True) -> 'ParserState[StateT]':
|
||||
return type(self)(
|
||||
self.parse_conf,
|
||||
self.lexer, # XXX copy
|
||||
copy(self.state_stack),
|
||||
deepcopy(self.value_stack) if deepcopy_values else copy(self.value_stack),
|
||||
)
|
||||
|
||||
def feed_token(self, token: Token, is_end=False) -> Any:
|
||||
state_stack = self.state_stack
|
||||
value_stack = self.value_stack
|
||||
states = self.parse_conf.states
|
||||
end_state = self.parse_conf.end_state
|
||||
callbacks = self.parse_conf.callbacks
|
||||
|
||||
while True:
|
||||
state = state_stack[-1]
|
||||
try:
|
||||
action, arg = states[state][token.type]
|
||||
except KeyError:
|
||||
expected = {s for s in states[state].keys() if s.isupper()}
|
||||
raise UnexpectedToken(token, expected, state=self, interactive_parser=None)
|
||||
|
||||
assert arg != end_state
|
||||
|
||||
if action is Shift:
|
||||
# shift once and return
|
||||
assert not is_end
|
||||
state_stack.append(arg)
|
||||
value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
|
||||
return
|
||||
else:
|
||||
# reduce+shift as many times as necessary
|
||||
rule = arg
|
||||
size = len(rule.expansion)
|
||||
if size:
|
||||
s = value_stack[-size:]
|
||||
del state_stack[-size:]
|
||||
del value_stack[-size:]
|
||||
else:
|
||||
s = []
|
||||
|
||||
value = callbacks[rule](s) if callbacks else s
|
||||
|
||||
_action, new_state = states[state_stack[-1]][rule.origin.name]
|
||||
assert _action is Shift
|
||||
state_stack.append(new_state)
|
||||
value_stack.append(value)
|
||||
|
||||
if is_end and state_stack[-1] == end_state:
|
||||
return value_stack[-1]
|
||||
###}
|
||||
165
ccxt/static_dependencies/lark/parsers/xearley.py
Normal file
165
ccxt/static_dependencies/lark/parsers/xearley.py
Normal file
@@ -0,0 +1,165 @@
|
||||
"""This module implements an Earley parser with a dynamic lexer
|
||||
|
||||
The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
|
||||
https://www.sciencedirect.com/science/article/pii/S1571066108001497
|
||||
|
||||
That is probably the best reference for understanding the algorithm here.
|
||||
|
||||
The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
|
||||
is better documented here:
|
||||
http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
|
||||
|
||||
Instead of running a lexer beforehand, or using a costy char-by-char method, this parser
|
||||
uses regular expressions by necessity, achieving high-performance while maintaining all of
|
||||
Earley's power in parsing any CFG.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Callable, Optional, List, Any
|
||||
from collections import defaultdict
|
||||
|
||||
from ..tree import Tree
|
||||
from ..exceptions import UnexpectedCharacters
|
||||
from ..lexer import Token
|
||||
from ..grammar import Terminal
|
||||
from .earley import Parser as BaseParser
|
||||
from .earley_forest import TokenNode
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..common import LexerConf, ParserConf
|
||||
|
||||
class Parser(BaseParser):
|
||||
def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matcher: Callable,
|
||||
resolve_ambiguity: bool=True, complete_lex: bool=False, debug: bool=False,
|
||||
tree_class: Optional[Callable[[str, List], Any]]=Tree, ordered_sets: bool=True):
|
||||
BaseParser.__init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity,
|
||||
debug, tree_class, ordered_sets)
|
||||
self.ignore = [Terminal(t) for t in lexer_conf.ignore]
|
||||
self.complete_lex = complete_lex
|
||||
|
||||
def _parse(self, stream, columns, to_scan, start_symbol=None):
|
||||
|
||||
def scan(i, to_scan):
|
||||
"""The core Earley Scanner.
|
||||
|
||||
This is a custom implementation of the scanner that uses the
|
||||
Lark lexer to match tokens. The scan list is built by the
|
||||
Earley predictor, based on the previously completed tokens.
|
||||
This ensures that at each phase of the parse we have a custom
|
||||
lexer context, allowing for more complex ambiguities."""
|
||||
|
||||
node_cache = {}
|
||||
|
||||
# 1) Loop the expectations and ask the lexer to match.
|
||||
# Since regexp is forward looking on the input stream, and we only
|
||||
# want to process tokens when we hit the point in the stream at which
|
||||
# they complete, we push all tokens into a buffer (delayed_matches), to
|
||||
# be held possibly for a later parse step when we reach the point in the
|
||||
# input stream at which they complete.
|
||||
for item in self.Set(to_scan):
|
||||
m = match(item.expect, stream, i)
|
||||
if m:
|
||||
t = Token(item.expect.name, m.group(0), i, text_line, text_column)
|
||||
delayed_matches[m.end()].append( (item, i, t) )
|
||||
|
||||
if self.complete_lex:
|
||||
s = m.group(0)
|
||||
for j in range(1, len(s)):
|
||||
m = match(item.expect, s[:-j])
|
||||
if m:
|
||||
t = Token(item.expect.name, m.group(0), i, text_line, text_column)
|
||||
delayed_matches[i+m.end()].append( (item, i, t) )
|
||||
|
||||
# XXX The following 3 lines were commented out for causing a bug. See issue #768
|
||||
# # Remove any items that successfully matched in this pass from the to_scan buffer.
|
||||
# # This ensures we don't carry over tokens that already matched, if we're ignoring below.
|
||||
# to_scan.remove(item)
|
||||
|
||||
# 3) Process any ignores. This is typically used for e.g. whitespace.
|
||||
# We carry over any unmatched items from the to_scan buffer to be matched again after
|
||||
# the ignore. This should allow us to use ignored symbols in non-terminals to implement
|
||||
# e.g. mandatory spacing.
|
||||
for x in self.ignore:
|
||||
m = match(x, stream, i)
|
||||
if m:
|
||||
# Carry over any items still in the scan buffer, to past the end of the ignored items.
|
||||
delayed_matches[m.end()].extend([(item, i, None) for item in to_scan ])
|
||||
|
||||
# If we're ignoring up to the end of the file, # carry over the start symbol if it already completed.
|
||||
delayed_matches[m.end()].extend([(item, i, None) for item in columns[i] if item.is_complete and item.s == start_symbol])
|
||||
|
||||
next_to_scan = self.Set()
|
||||
next_set = self.Set()
|
||||
columns.append(next_set)
|
||||
transitives.append({})
|
||||
|
||||
## 4) Process Tokens from delayed_matches.
|
||||
# This is the core of the Earley scanner. Create an SPPF node for each Token,
|
||||
# and create the symbol node in the SPPF tree. Advance the item that completed,
|
||||
# and add the resulting new item to either the Earley set (for processing by the
|
||||
# completer/predictor) or the to_scan buffer for the next parse step.
|
||||
for item, start, token in delayed_matches[i+1]:
|
||||
if token is not None:
|
||||
token.end_line = text_line
|
||||
token.end_column = text_column + 1
|
||||
token.end_pos = i + 1
|
||||
|
||||
new_item = item.advance()
|
||||
label = (new_item.s, new_item.start, i + 1)
|
||||
token_node = TokenNode(token, terminals[token.type])
|
||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
|
||||
new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token_node)
|
||||
else:
|
||||
new_item = item
|
||||
|
||||
if new_item.expect in self.TERMINALS:
|
||||
# add (B ::= Aai+1.B, h, y) to Q'
|
||||
next_to_scan.add(new_item)
|
||||
else:
|
||||
# add (B ::= Aa+1.B, h, y) to Ei+1
|
||||
next_set.add(new_item)
|
||||
|
||||
del delayed_matches[i+1] # No longer needed, so unburden memory
|
||||
|
||||
if not next_set and not delayed_matches and not next_to_scan:
|
||||
considered_rules = list(sorted(to_scan, key=lambda key: key.rule.origin.name))
|
||||
raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan},
|
||||
set(to_scan), state=frozenset(i.s for i in to_scan),
|
||||
considered_rules=considered_rules
|
||||
)
|
||||
|
||||
return next_to_scan
|
||||
|
||||
|
||||
delayed_matches = defaultdict(list)
|
||||
match = self.term_matcher
|
||||
terminals = self.lexer_conf.terminals_by_name
|
||||
|
||||
# Cache for nodes & tokens created in a particular parse step.
|
||||
transitives = [{}]
|
||||
|
||||
text_line = 1
|
||||
text_column = 1
|
||||
|
||||
## The main Earley loop.
|
||||
# Run the Prediction/Completion cycle for any Items in the current Earley set.
|
||||
# Completions will be added to the SPPF tree, and predictions will be recursively
|
||||
# processed down to terminals/empty nodes to be added to the scanner for the next
|
||||
# step.
|
||||
i = 0
|
||||
for token in stream:
|
||||
self.predict_and_complete(i, to_scan, columns, transitives)
|
||||
|
||||
to_scan = scan(i, to_scan)
|
||||
|
||||
if token == '\n':
|
||||
text_line += 1
|
||||
text_column = 1
|
||||
else:
|
||||
text_column += 1
|
||||
i += 1
|
||||
|
||||
self.predict_and_complete(i, to_scan, columns, transitives)
|
||||
|
||||
## Column is now the final column in the parse.
|
||||
assert i == len(columns)-1
|
||||
return to_scan
|
||||
Reference in New Issue
Block a user