add
This commit is contained in:
203
ccxt/static_dependencies/lark/parsers/grammar_analysis.py
Normal file
203
ccxt/static_dependencies/lark/parsers/grammar_analysis.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"Provides for superficial grammar analysis."
|
||||
|
||||
from collections import Counter, defaultdict
|
||||
from typing import List, Dict, Iterator, FrozenSet, Set
|
||||
|
||||
from ..utils import bfs, fzset, classify
|
||||
from ..exceptions import GrammarError
|
||||
from ..grammar import Rule, Terminal, NonTerminal, Symbol
|
||||
from ..common import ParserConf
|
||||
|
||||
|
||||
class RulePtr:
|
||||
__slots__ = ('rule', 'index')
|
||||
rule: Rule
|
||||
index: int
|
||||
|
||||
def __init__(self, rule: Rule, index: int):
|
||||
assert isinstance(rule, Rule)
|
||||
assert index <= len(rule.expansion)
|
||||
self.rule = rule
|
||||
self.index = index
|
||||
|
||||
def __repr__(self):
|
||||
before = [x.name for x in self.rule.expansion[:self.index]]
|
||||
after = [x.name for x in self.rule.expansion[self.index:]]
|
||||
return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
|
||||
|
||||
@property
|
||||
def next(self) -> Symbol:
|
||||
return self.rule.expansion[self.index]
|
||||
|
||||
def advance(self, sym: Symbol) -> 'RulePtr':
|
||||
assert self.next == sym
|
||||
return RulePtr(self.rule, self.index+1)
|
||||
|
||||
@property
|
||||
def is_satisfied(self) -> bool:
|
||||
return self.index == len(self.rule.expansion)
|
||||
|
||||
def __eq__(self, other) -> bool:
|
||||
if not isinstance(other, RulePtr):
|
||||
return NotImplemented
|
||||
return self.rule == other.rule and self.index == other.index
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash((self.rule, self.index))
|
||||
|
||||
|
||||
State = FrozenSet[RulePtr]
|
||||
|
||||
# state generation ensures no duplicate LR0ItemSets
|
||||
class LR0ItemSet:
|
||||
__slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
|
||||
|
||||
kernel: State
|
||||
closure: State
|
||||
transitions: Dict[Symbol, 'LR0ItemSet']
|
||||
lookaheads: Dict[Symbol, Set[Rule]]
|
||||
|
||||
def __init__(self, kernel, closure):
|
||||
self.kernel = fzset(kernel)
|
||||
self.closure = fzset(closure)
|
||||
self.transitions = {}
|
||||
self.lookaheads = defaultdict(set)
|
||||
|
||||
def __repr__(self):
|
||||
return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
|
||||
|
||||
|
||||
def update_set(set1, set2):
|
||||
if not set2 or set1 > set2:
|
||||
return False
|
||||
|
||||
copy = set(set1)
|
||||
set1 |= set2
|
||||
return set1 != copy
|
||||
|
||||
def calculate_sets(rules):
|
||||
"""Calculate FOLLOW sets.
|
||||
|
||||
Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
|
||||
symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
|
||||
|
||||
# foreach grammar rule X ::= Y(1) ... Y(k)
|
||||
# if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
|
||||
# NULLABLE = NULLABLE union {X}
|
||||
# for i = 1 to k
|
||||
# if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
|
||||
# FIRST(X) = FIRST(X) union FIRST(Y(i))
|
||||
# for j = i+1 to k
|
||||
# if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
|
||||
# FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
|
||||
# if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
|
||||
# FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
|
||||
# until none of NULLABLE,FIRST,FOLLOW changed in last iteration
|
||||
|
||||
NULLABLE = set()
|
||||
FIRST = {}
|
||||
FOLLOW = {}
|
||||
for sym in symbols:
|
||||
FIRST[sym]={sym} if sym.is_term else set()
|
||||
FOLLOW[sym]=set()
|
||||
|
||||
# Calculate NULLABLE and FIRST
|
||||
changed = True
|
||||
while changed:
|
||||
changed = False
|
||||
|
||||
for rule in rules:
|
||||
if set(rule.expansion) <= NULLABLE:
|
||||
if update_set(NULLABLE, {rule.origin}):
|
||||
changed = True
|
||||
|
||||
for i, sym in enumerate(rule.expansion):
|
||||
if set(rule.expansion[:i]) <= NULLABLE:
|
||||
if update_set(FIRST[rule.origin], FIRST[sym]):
|
||||
changed = True
|
||||
else:
|
||||
break
|
||||
|
||||
# Calculate FOLLOW
|
||||
changed = True
|
||||
while changed:
|
||||
changed = False
|
||||
|
||||
for rule in rules:
|
||||
for i, sym in enumerate(rule.expansion):
|
||||
if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
|
||||
if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
|
||||
changed = True
|
||||
|
||||
for j in range(i+1, len(rule.expansion)):
|
||||
if set(rule.expansion[i+1:j]) <= NULLABLE:
|
||||
if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
|
||||
changed = True
|
||||
|
||||
return FIRST, FOLLOW, NULLABLE
|
||||
|
||||
|
||||
class GrammarAnalyzer:
|
||||
def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
|
||||
self.debug = debug
|
||||
self.strict = strict
|
||||
|
||||
root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
|
||||
for start in parser_conf.start}
|
||||
|
||||
rules = parser_conf.rules + list(root_rules.values())
|
||||
self.rules_by_origin: Dict[NonTerminal, List[Rule]] = classify(rules, lambda r: r.origin)
|
||||
|
||||
if len(rules) != len(set(rules)):
|
||||
duplicates = [item for item, count in Counter(rules).items() if count > 1]
|
||||
raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
|
||||
|
||||
for r in rules:
|
||||
for sym in r.expansion:
|
||||
if not (sym.is_term or sym in self.rules_by_origin):
|
||||
raise GrammarError("Using an undefined rule: %s" % sym)
|
||||
|
||||
self.start_states = {start: self.expand_rule(root_rule.origin)
|
||||
for start, root_rule in root_rules.items()}
|
||||
|
||||
self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
|
||||
for start, root_rule in root_rules.items()}
|
||||
|
||||
lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
|
||||
for start in parser_conf.start}
|
||||
|
||||
lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
|
||||
assert(len(lr0_rules) == len(set(lr0_rules)))
|
||||
|
||||
self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
|
||||
|
||||
# cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
|
||||
self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
|
||||
for start, root_rule in lr0_root_rules.items()}
|
||||
|
||||
self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
|
||||
|
||||
def expand_rule(self, source_rule: NonTerminal, rules_by_origin=None) -> State:
|
||||
"Returns all init_ptrs accessible by rule (recursive)"
|
||||
|
||||
if rules_by_origin is None:
|
||||
rules_by_origin = self.rules_by_origin
|
||||
|
||||
init_ptrs = set()
|
||||
def _expand_rule(rule: NonTerminal) -> Iterator[NonTerminal]:
|
||||
assert not rule.is_term, rule
|
||||
|
||||
for r in rules_by_origin[rule]:
|
||||
init_ptr = RulePtr(r, 0)
|
||||
init_ptrs.add(init_ptr)
|
||||
|
||||
if r.expansion: # if not empty rule
|
||||
new_r = init_ptr.next
|
||||
if not new_r.is_term:
|
||||
assert isinstance(new_r, NonTerminal)
|
||||
yield new_r
|
||||
|
||||
for _ in bfs([source_rule], _expand_rule):
|
||||
pass
|
||||
|
||||
return fzset(init_ptrs)
|
||||
Reference in New Issue
Block a user