add
This commit is contained in:
10
ccxt/static_dependencies/parsimonious/__init__.py
Normal file
10
ccxt/static_dependencies/parsimonious/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""Parsimonious's public API. Import from here.
|
||||
|
||||
Things may move around in modules deeper than this one.
|
||||
|
||||
"""
|
||||
from .exceptions import (ParseError, IncompleteParseError,
|
||||
VisitationError, UndefinedLabel,
|
||||
BadGrammar)
|
||||
from .grammar import Grammar, TokenGrammar
|
||||
from .nodes import NodeVisitor, VisitationError, rule
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
105
ccxt/static_dependencies/parsimonious/exceptions.py
Normal file
105
ccxt/static_dependencies/parsimonious/exceptions.py
Normal file
@@ -0,0 +1,105 @@
|
||||
|
||||
from .utils import StrAndRepr
|
||||
|
||||
|
||||
class ParseError(StrAndRepr, Exception):
|
||||
"""A call to ``Expression.parse()`` or ``match()`` didn't match."""
|
||||
|
||||
def __init__(self, text, pos=-1, expr=None):
|
||||
# It would be nice to use self.args, but I don't want to pay a penalty
|
||||
# to call descriptors or have the confusion of numerical indices in
|
||||
# Expression.match_core().
|
||||
self.text = text
|
||||
self.pos = pos
|
||||
self.expr = expr
|
||||
|
||||
def __str__(self):
|
||||
rule_name = ((u"'%s'" % self.expr.name) if self.expr.name else
|
||||
str(self.expr))
|
||||
return u"Rule %s didn't match at '%s' (line %s, column %s)." % (
|
||||
rule_name,
|
||||
self.text[self.pos:self.pos + 20],
|
||||
self.line(),
|
||||
self.column())
|
||||
|
||||
# TODO: Add line, col, and separated-out error message so callers can build
|
||||
# their own presentation.
|
||||
|
||||
def line(self):
|
||||
"""Return the 1-based line number where the expression ceased to
|
||||
match."""
|
||||
# This is a method rather than a property in case we ever wanted to
|
||||
# pass in which line endings we want to use.
|
||||
return self.text.count('\n', 0, self.pos) + 1
|
||||
|
||||
def column(self):
|
||||
"""Return the 1-based column where the expression ceased to match."""
|
||||
# We choose 1-based because that's what Python does with SyntaxErrors.
|
||||
try:
|
||||
return self.pos - self.text.rindex('\n', 0, self.pos)
|
||||
except ValueError:
|
||||
return self.pos + 1
|
||||
|
||||
|
||||
class IncompleteParseError(ParseError):
|
||||
"""A call to ``parse()`` matched a whole Expression but did not consume the
|
||||
entire text."""
|
||||
|
||||
def __str__(self):
|
||||
return u"Rule '%s' matched in its entirety, but it didn't consume all the text. The non-matching portion of the text begins with '%s' (line %s, column %s)." % (
|
||||
self.expr.name,
|
||||
self.text[self.pos:self.pos + 20],
|
||||
self.line(),
|
||||
self.column())
|
||||
|
||||
|
||||
class VisitationError(Exception):
|
||||
"""Something went wrong while traversing a parse tree.
|
||||
|
||||
This exception exists to augment an underlying exception with information
|
||||
about where in the parse tree the error occurred. Otherwise, it could be
|
||||
tiresome to figure out what went wrong; you'd have to play back the whole
|
||||
tree traversal in your head.
|
||||
|
||||
"""
|
||||
# TODO: Make sure this is pickleable. Probably use @property pattern. Make
|
||||
# the original exc and node available on it if they don't cause a whole
|
||||
# raft of stack frames to be retained.
|
||||
def __init__(self, exc, exc_class, node):
|
||||
"""Construct.
|
||||
|
||||
:arg exc: What went wrong. We wrap this and add more info.
|
||||
:arg node: The node at which the error occurred
|
||||
|
||||
"""
|
||||
self.original_class = exc_class
|
||||
super(VisitationError, self).__init__(
|
||||
'%s: %s\n\n'
|
||||
'Parse tree:\n'
|
||||
'%s' %
|
||||
(exc_class.__name__,
|
||||
exc,
|
||||
node.prettily(error=node)))
|
||||
|
||||
|
||||
class BadGrammar(StrAndRepr, Exception):
|
||||
"""Something was wrong with the definition of a grammar.
|
||||
|
||||
Note that a ParseError might be raised instead if the error is in the
|
||||
grammar definition syntax.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class UndefinedLabel(BadGrammar):
|
||||
"""A rule referenced in a grammar was never defined.
|
||||
|
||||
Circular references and forward references are okay, but you have to define
|
||||
stuff at some point.
|
||||
|
||||
"""
|
||||
def __init__(self, label):
|
||||
self.label = label
|
||||
|
||||
def __str__(self):
|
||||
return u'The label "%s" was never defined.' % self.label
|
||||
479
ccxt/static_dependencies/parsimonious/expressions.py
Normal file
479
ccxt/static_dependencies/parsimonious/expressions.py
Normal file
@@ -0,0 +1,479 @@
|
||||
"""Subexpressions that make up a parsed grammar
|
||||
|
||||
These do the parsing.
|
||||
|
||||
"""
|
||||
# TODO: Make sure all symbol refs are local--not class lookups or
|
||||
# anything--for speed. And kill all the dots.
|
||||
|
||||
from inspect import getfullargspec, isfunction, ismethod, ismethoddescriptor
|
||||
import re
|
||||
|
||||
from .exceptions import ParseError, IncompleteParseError
|
||||
from .nodes import Node, RegexNode
|
||||
from .utils import StrAndRepr
|
||||
|
||||
MARKER = object()
|
||||
|
||||
|
||||
def is_callable(value):
|
||||
criteria = [isfunction, ismethod, ismethoddescriptor]
|
||||
return any([criterion(value) for criterion in criteria])
|
||||
|
||||
|
||||
def expression(callable, rule_name, grammar):
|
||||
"""Turn a plain callable into an Expression.
|
||||
|
||||
The callable can be of this simple form::
|
||||
|
||||
def foo(text, pos):
|
||||
'''If this custom expression matches starting at text[pos], return
|
||||
the index where it stops matching. Otherwise, return None.'''
|
||||
if the expression matched:
|
||||
return end_pos
|
||||
|
||||
If there child nodes to return, return a tuple::
|
||||
|
||||
return end_pos, children
|
||||
|
||||
If the expression doesn't match at the given ``pos`` at all... ::
|
||||
|
||||
return None
|
||||
|
||||
If your callable needs to make sub-calls to other rules in the grammar or
|
||||
do error reporting, it can take this form, gaining additional arguments::
|
||||
|
||||
def foo(text, pos, cache, error, grammar):
|
||||
# Call out to other rules:
|
||||
node = grammar['another_rule'].match_core(text, pos, cache, error)
|
||||
...
|
||||
# Return values as above.
|
||||
|
||||
The return value of the callable, if an int or a tuple, will be
|
||||
automatically transmuted into a :class:`~.Node`. If it returns
|
||||
a Node-like class directly, it will be passed through unchanged.
|
||||
|
||||
:arg rule_name: The rule name to attach to the resulting
|
||||
:class:`~.Expression`
|
||||
:arg grammar: The :class:`~.Grammar` this expression will be a
|
||||
part of, to make delegating to other rules possible
|
||||
|
||||
"""
|
||||
|
||||
# Resolve unbound methods; allows grammars to use @staticmethod custom rules
|
||||
# https://stackoverflow.com/questions/41921255/staticmethod-object-is-not-callable
|
||||
if ismethoddescriptor(callable) and hasattr(callable, '__func__'):
|
||||
callable = callable.__func__
|
||||
|
||||
num_args = len(getfullargspec(callable).args)
|
||||
if ismethod(callable):
|
||||
# do not count the first argument (typically 'self') for methods
|
||||
num_args -= 1
|
||||
if num_args == 2:
|
||||
is_simple = True
|
||||
elif num_args == 5:
|
||||
is_simple = False
|
||||
else:
|
||||
raise RuntimeError("Custom rule functions must take either 2 or 5 "
|
||||
"arguments, not %s." % num_args)
|
||||
|
||||
class AdHocExpression(Expression):
|
||||
def _uncached_match(self, text, pos, cache, error):
|
||||
result = (callable(text, pos) if is_simple else
|
||||
callable(text, pos, cache, error, grammar))
|
||||
|
||||
if isinstance(result, int):
|
||||
end, children = result, None
|
||||
elif isinstance(result, tuple):
|
||||
end, children = result
|
||||
else:
|
||||
# Node or None
|
||||
return result
|
||||
return Node(self, text, pos, end, children=children)
|
||||
|
||||
def _as_rhs(self):
|
||||
return '{custom function "%s"}' % callable.__name__
|
||||
|
||||
return AdHocExpression(name=rule_name)
|
||||
|
||||
|
||||
class Expression(StrAndRepr):
|
||||
"""A thing that can be matched against a piece of text"""
|
||||
|
||||
# Slots are about twice as fast as __dict__-based attributes:
|
||||
# http://stackoverflow.com/questions/1336791/dictionary-vs-object-which-is-more-efficient-and-why
|
||||
|
||||
# Top-level expressions--rules--have names. Subexpressions are named ''.
|
||||
__slots__ = ['name', 'identity_tuple']
|
||||
|
||||
def __init__(self, name=''):
|
||||
self.name = name
|
||||
self.identity_tuple = (self.name, )
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.identity_tuple)
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, self.__class__) and self.identity_tuple == other.identity_tuple
|
||||
|
||||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
def parse(self, text, pos=0):
|
||||
"""Return a parse tree of ``text``.
|
||||
|
||||
Raise ``ParseError`` if the expression wasn't satisfied. Raise
|
||||
``IncompleteParseError`` if the expression was satisfied but didn't
|
||||
consume the full string.
|
||||
|
||||
"""
|
||||
node = self.match(text, pos=pos)
|
||||
if node.end < len(text):
|
||||
raise IncompleteParseError(text, node.end, self)
|
||||
return node
|
||||
|
||||
def match(self, text, pos=0):
|
||||
"""Return the parse tree matching this expression at the given
|
||||
position, not necessarily extending all the way to the end of ``text``.
|
||||
|
||||
Raise ``ParseError`` if there is no match there.
|
||||
|
||||
:arg pos: The index at which to start matching
|
||||
|
||||
"""
|
||||
error = ParseError(text)
|
||||
node = self.match_core(text, pos, {}, error)
|
||||
if node is None:
|
||||
raise error
|
||||
return node
|
||||
|
||||
def match_core(self, text, pos, cache, error):
|
||||
"""Internal guts of ``match()``
|
||||
|
||||
This is appropriate to call only from custom rules or Expression
|
||||
subclasses.
|
||||
|
||||
:arg cache: The packrat cache::
|
||||
|
||||
{(oid, pos): Node tree matched by object `oid` at index `pos` ...}
|
||||
|
||||
:arg error: A ParseError instance with ``text`` already filled in but
|
||||
otherwise blank. We update the error reporting info on this object
|
||||
as we go. (Sticking references on an existing instance is faster
|
||||
than allocating a new one for each expression that fails.) We
|
||||
return None rather than raising and catching ParseErrors because
|
||||
catching is slow.
|
||||
|
||||
"""
|
||||
# TODO: Optimize. Probably a hot spot.
|
||||
#
|
||||
# Is there a way of looking up cached stuff that's faster than hashing
|
||||
# this id-pos pair?
|
||||
#
|
||||
# If this is slow, think about the array module. It might (or might
|
||||
# not!) use more RAM, but it'll likely be faster than hashing things
|
||||
# all the time. Also, can we move all the allocs up front?
|
||||
#
|
||||
# To save space, we have lots of choices: (0) Quit caching whole Node
|
||||
# objects. Cache just what you need to reconstitute them. (1) Cache
|
||||
# only the results of entire rules, not subexpressions (probably a
|
||||
# horrible idea for rules that need to backtrack internally a lot). (2)
|
||||
# Age stuff out of the cache somehow. LRU? (3) Cuts.
|
||||
expr_id = id(self)
|
||||
node = cache.get((expr_id, pos), MARKER) # TODO: Change to setdefault to prevent infinite recursion in left-recursive rules.
|
||||
if node is MARKER:
|
||||
node = cache[(expr_id, pos)] = self._uncached_match(text,
|
||||
pos,
|
||||
cache,
|
||||
error)
|
||||
|
||||
# Record progress for error reporting:
|
||||
if node is None and pos >= error.pos and (
|
||||
self.name or getattr(error.expr, 'name', None) is None):
|
||||
# Don't bother reporting on unnamed expressions (unless that's all
|
||||
# we've seen so far), as they're hard to track down for a human.
|
||||
# Perhaps we could include the unnamed subexpressions later as
|
||||
# auxiliary info.
|
||||
error.expr = self
|
||||
error.pos = pos
|
||||
|
||||
return node
|
||||
|
||||
def __str__(self):
|
||||
return u'<%s %s>' % (
|
||||
self.__class__.__name__,
|
||||
self.as_rule())
|
||||
|
||||
def as_rule(self):
|
||||
"""Return the left- and right-hand sides of a rule that represents me.
|
||||
|
||||
Return unicode. If I have no ``name``, omit the left-hand side.
|
||||
|
||||
"""
|
||||
rhs = self._as_rhs().strip()
|
||||
if rhs.startswith('(') and rhs.endswith(')'):
|
||||
rhs = rhs[1:-1]
|
||||
|
||||
return (u'%s = %s' % (self.name, rhs)) if self.name else rhs
|
||||
|
||||
def _unicode_members(self):
|
||||
"""Return an iterable of my unicode-represented children, stopping
|
||||
descent when we hit a named node so the returned value resembles the
|
||||
input rule."""
|
||||
return [(m.name or m._as_rhs()) for m in self.members]
|
||||
|
||||
def _as_rhs(self):
|
||||
"""Return the right-hand side of a rule that represents me.
|
||||
|
||||
Implemented by subclasses.
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class Literal(Expression):
|
||||
"""A string literal
|
||||
|
||||
Use these if you can; they're the fastest.
|
||||
|
||||
"""
|
||||
__slots__ = ['literal']
|
||||
|
||||
def __init__(self, literal, name=''):
|
||||
super(Literal, self).__init__(name)
|
||||
self.literal = literal
|
||||
self.identity_tuple = (name, literal)
|
||||
|
||||
def _uncached_match(self, text, pos, cache, error):
|
||||
if text.startswith(self.literal, pos):
|
||||
return Node(self, text, pos, pos + len(self.literal))
|
||||
|
||||
def _as_rhs(self):
|
||||
return repr(self.literal)
|
||||
|
||||
|
||||
class TokenMatcher(Literal):
|
||||
"""An expression matching a single token of a given type
|
||||
|
||||
This is for use only with TokenGrammars.
|
||||
|
||||
"""
|
||||
def _uncached_match(self, token_list, pos, cache, error):
|
||||
if token_list[pos].type == self.literal:
|
||||
return Node(self, token_list, pos, pos + 1)
|
||||
|
||||
|
||||
class Regex(Expression):
|
||||
"""An expression that matches what a regex does.
|
||||
|
||||
Use these as much as you can and jam as much into each one as you can;
|
||||
they're fast.
|
||||
|
||||
"""
|
||||
__slots__ = ['re']
|
||||
|
||||
def __init__(self, pattern, name='', ignore_case=False, locale=False,
|
||||
multiline=False, dot_all=False, unicode=False, verbose=False, ascii=False):
|
||||
super(Regex, self).__init__(name)
|
||||
self.re = re.compile(pattern, (ignore_case and re.I) |
|
||||
(locale and re.L) |
|
||||
(multiline and re.M) |
|
||||
(dot_all and re.S) |
|
||||
(unicode and re.U) |
|
||||
(verbose and re.X) |
|
||||
(ascii and re.A))
|
||||
self.identity_tuple = (self.name, self.re)
|
||||
|
||||
def _uncached_match(self, text, pos, cache, error):
|
||||
"""Return length of match, ``None`` if no match."""
|
||||
m = self.re.match(text, pos)
|
||||
if m is not None:
|
||||
span = m.span()
|
||||
node = RegexNode(self, text, pos, pos + span[1] - span[0])
|
||||
node.match = m # TODO: A terrible idea for cache size?
|
||||
return node
|
||||
|
||||
def _regex_flags_from_bits(self, bits):
|
||||
"""Return the textual equivalent of numerically encoded regex flags."""
|
||||
flags = 'ilmsuxa'
|
||||
return ''.join(flags[i - 1] if (1 << i) & bits else '' for i in range(1, len(flags) + 1))
|
||||
|
||||
def _as_rhs(self):
|
||||
return '~{!r}{}'.format(self.re.pattern,
|
||||
self._regex_flags_from_bits(self.re.flags))
|
||||
|
||||
|
||||
class Compound(Expression):
|
||||
"""An abstract expression which contains other expressions"""
|
||||
|
||||
__slots__ = ['members']
|
||||
|
||||
def __init__(self, *members, **kwargs):
|
||||
"""``members`` is a sequence of expressions."""
|
||||
super(Compound, self).__init__(kwargs.get('name', ''))
|
||||
self.members = members
|
||||
|
||||
def __hash__(self):
|
||||
# Note we leave members out of the hash computation, since compounds can get added to
|
||||
# sets, then have their members mutated. See RuleVisitor._resolve_refs.
|
||||
# Equality should still work, but we want the rules to go into the correct hash bucket.
|
||||
return hash((self.__class__, self.name))
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
isinstance(other, self.__class__) and
|
||||
self.name == other.name and
|
||||
self.members == other.members)
|
||||
|
||||
|
||||
class Sequence(Compound):
|
||||
"""A series of expressions that must match contiguous, ordered pieces of
|
||||
the text
|
||||
|
||||
In other words, it's a concatenation operator: each piece has to match, one
|
||||
after another.
|
||||
|
||||
"""
|
||||
def _uncached_match(self, text, pos, cache, error):
|
||||
new_pos = pos
|
||||
length_of_sequence = 0
|
||||
children = []
|
||||
for m in self.members:
|
||||
node = m.match_core(text, new_pos, cache, error)
|
||||
if node is None:
|
||||
return None
|
||||
children.append(node)
|
||||
length = node.end - node.start
|
||||
new_pos += length
|
||||
length_of_sequence += length
|
||||
# Hooray! We got through all the members!
|
||||
return Node(self, text, pos, pos + length_of_sequence, children)
|
||||
|
||||
def _as_rhs(self):
|
||||
return u'({0})'.format(u' '.join(self._unicode_members()))
|
||||
|
||||
|
||||
class OneOf(Compound):
|
||||
"""A series of expressions, one of which must match
|
||||
|
||||
Expressions are tested in order from first to last. The first to succeed
|
||||
wins.
|
||||
|
||||
"""
|
||||
def _uncached_match(self, text, pos, cache, error):
|
||||
for m in self.members:
|
||||
node = m.match_core(text, pos, cache, error)
|
||||
if node is not None:
|
||||
# Wrap the succeeding child in a node representing the OneOf:
|
||||
return Node(self, text, pos, node.end, children=[node])
|
||||
|
||||
def _as_rhs(self):
|
||||
return u'({0})'.format(u' / '.join(self._unicode_members()))
|
||||
|
||||
|
||||
class Lookahead(Compound):
|
||||
"""An expression which consumes nothing, even if its contained expression
|
||||
succeeds"""
|
||||
|
||||
# TODO: Merge this and Not for better cache hit ratios and less code.
|
||||
# Downside: pretty-printed grammars might be spelled differently than what
|
||||
# went in. That doesn't bother me.
|
||||
|
||||
def _uncached_match(self, text, pos, cache, error):
|
||||
node = self.members[0].match_core(text, pos, cache, error)
|
||||
if node is not None:
|
||||
return Node(self, text, pos, pos)
|
||||
|
||||
def _as_rhs(self):
|
||||
return u'&%s' % self._unicode_members()[0]
|
||||
|
||||
|
||||
class Not(Compound):
|
||||
"""An expression that succeeds only if the expression within it doesn't
|
||||
|
||||
In any case, it never consumes any characters; it's a negative lookahead.
|
||||
|
||||
"""
|
||||
def _uncached_match(self, text, pos, cache, error):
|
||||
# FWIW, the implementation in Parsing Techniques in Figure 15.29 does
|
||||
# not bother to cache NOTs directly.
|
||||
node = self.members[0].match_core(text, pos, cache, error)
|
||||
if node is None:
|
||||
return Node(self, text, pos, pos)
|
||||
|
||||
def _as_rhs(self):
|
||||
# TODO: Make sure this parenthesizes the member properly if it's an OR
|
||||
# or AND.
|
||||
return u'!%s' % self._unicode_members()[0]
|
||||
|
||||
|
||||
# Quantifiers. None of these is strictly necessary, but they're darn handy.
|
||||
|
||||
class Optional(Compound):
|
||||
"""An expression that succeeds whether or not the contained one does
|
||||
|
||||
If the contained expression succeeds, it goes ahead and consumes what it
|
||||
consumes. Otherwise, it consumes nothing.
|
||||
|
||||
"""
|
||||
def _uncached_match(self, text, pos, cache, error):
|
||||
node = self.members[0].match_core(text, pos, cache, error)
|
||||
return (Node(self, text, pos, pos) if node is None else
|
||||
Node(self, text, pos, node.end, children=[node]))
|
||||
|
||||
def _as_rhs(self):
|
||||
return u'%s?' % self._unicode_members()[0]
|
||||
|
||||
|
||||
# TODO: Merge with OneOrMore.
|
||||
class ZeroOrMore(Compound):
|
||||
"""An expression wrapper like the * quantifier in regexes."""
|
||||
|
||||
def _uncached_match(self, text, pos, cache, error):
|
||||
new_pos = pos
|
||||
children = []
|
||||
while True:
|
||||
node = self.members[0].match_core(text, new_pos, cache, error)
|
||||
if node is None or not (node.end - node.start):
|
||||
# Node was None or 0 length. 0 would otherwise loop infinitely.
|
||||
return Node(self, text, pos, new_pos, children)
|
||||
children.append(node)
|
||||
new_pos += node.end - node.start
|
||||
|
||||
def _as_rhs(self):
|
||||
return u'%s*' % self._unicode_members()[0]
|
||||
|
||||
|
||||
class OneOrMore(Compound):
|
||||
"""An expression wrapper like the + quantifier in regexes.
|
||||
|
||||
You can also pass in an alternate minimum to make this behave like "2 or
|
||||
more", "3 or more", etc.
|
||||
|
||||
"""
|
||||
__slots__ = ['min']
|
||||
|
||||
# TODO: Add max. It should probably succeed if there are more than the max
|
||||
# --just not consume them.
|
||||
|
||||
def __init__(self, member, name='', min=1):
|
||||
super(OneOrMore, self).__init__(member, name=name)
|
||||
self.min = min
|
||||
|
||||
def _uncached_match(self, text, pos, cache, error):
|
||||
new_pos = pos
|
||||
children = []
|
||||
while True:
|
||||
node = self.members[0].match_core(text, new_pos, cache, error)
|
||||
if node is None:
|
||||
break
|
||||
children.append(node)
|
||||
length = node.end - node.start
|
||||
if length == 0: # Don't loop infinitely.
|
||||
break
|
||||
new_pos += length
|
||||
if len(children) >= self.min:
|
||||
return Node(self, text, pos, new_pos, children)
|
||||
|
||||
def _as_rhs(self):
|
||||
return u'%s+' % self._unicode_members()[0]
|
||||
487
ccxt/static_dependencies/parsimonious/grammar.py
Normal file
487
ccxt/static_dependencies/parsimonious/grammar.py
Normal file
@@ -0,0 +1,487 @@
|
||||
"""A convenience which constructs expression trees from an easy-to-read syntax
|
||||
|
||||
Use this unless you have a compelling reason not to; it performs some
|
||||
optimizations that would be tedious to do when constructing an expression tree
|
||||
by hand.
|
||||
|
||||
"""
|
||||
from collections import OrderedDict
|
||||
|
||||
from .exceptions import BadGrammar, UndefinedLabel
|
||||
from .expressions import (Literal, Regex, Sequence, OneOf,
|
||||
Lookahead, Optional, ZeroOrMore, OneOrMore, Not, TokenMatcher,
|
||||
expression, is_callable)
|
||||
from .nodes import NodeVisitor
|
||||
from .utils import evaluate_string
|
||||
|
||||
class Grammar(OrderedDict):
|
||||
"""A collection of rules that describe a language
|
||||
|
||||
You can start parsing from the default rule by calling ``parse()``
|
||||
directly on the ``Grammar`` object::
|
||||
|
||||
g = Grammar('''
|
||||
polite_greeting = greeting ", my good " title
|
||||
greeting = "Hi" / "Hello"
|
||||
title = "madam" / "sir"
|
||||
''')
|
||||
g.parse('Hello, my good sir')
|
||||
|
||||
Or start parsing from any of the other rules; you can pull them out of the
|
||||
grammar as if it were a dictionary::
|
||||
|
||||
g['title'].parse('sir')
|
||||
|
||||
You could also just construct a bunch of ``Expression`` objects yourself
|
||||
and stitch them together into a language, but using a ``Grammar`` has some
|
||||
important advantages:
|
||||
|
||||
* Languages are much easier to define in the nice syntax it provides.
|
||||
* Circular references aren't a pain.
|
||||
* It does all kinds of whizzy space- and time-saving optimizations, like
|
||||
factoring up repeated subexpressions into a single object, which should
|
||||
increase cache hit ratio. [Is this implemented yet?]
|
||||
|
||||
"""
|
||||
def __init__(self, rules='', **more_rules):
|
||||
"""Construct a grammar.
|
||||
|
||||
:arg rules: A string of production rules, one per line.
|
||||
:arg default_rule: The name of the rule invoked when you call
|
||||
:meth:`parse()` or :meth:`match()` on the grammar. Defaults to the
|
||||
first rule. Falls back to None if there are no string-based rules
|
||||
in this grammar.
|
||||
:arg more_rules: Additional kwargs whose names are rule names and
|
||||
values are Expressions or custom-coded callables which accomplish
|
||||
things the built-in rule syntax cannot. These take precedence over
|
||||
``rules`` in case of naming conflicts.
|
||||
|
||||
"""
|
||||
|
||||
decorated_custom_rules = {
|
||||
k: (expression(v, k, self) if is_callable(v) else v)
|
||||
for k, v in more_rules.items()}
|
||||
|
||||
exprs, first = self._expressions_from_rules(rules, decorated_custom_rules)
|
||||
super(Grammar, self).__init__(exprs.items())
|
||||
self.default_rule = first # may be None
|
||||
|
||||
def default(self, rule_name):
|
||||
"""Return a new Grammar whose :term:`default rule` is ``rule_name``."""
|
||||
new = self._copy()
|
||||
new.default_rule = new[rule_name]
|
||||
return new
|
||||
|
||||
def _copy(self):
|
||||
"""Return a shallow copy of myself.
|
||||
|
||||
Deep is unnecessary, since Expression trees are immutable. Subgrammars
|
||||
recreate all the Expressions from scratch, and AbstractGrammars have
|
||||
no Expressions.
|
||||
|
||||
"""
|
||||
new = Grammar.__new__(Grammar)
|
||||
super(Grammar, new).__init__(self.items())
|
||||
new.default_rule = self.default_rule
|
||||
return new
|
||||
|
||||
def _expressions_from_rules(self, rules, custom_rules):
|
||||
"""Return a 2-tuple: a dict of rule names pointing to their
|
||||
expressions, and then the first rule.
|
||||
|
||||
It's a web of expressions, all referencing each other. Typically,
|
||||
there's a single root to the web of references, and that root is the
|
||||
starting symbol for parsing, but there's nothing saying you can't have
|
||||
multiple roots.
|
||||
|
||||
:arg custom_rules: A map of rule names to custom-coded rules:
|
||||
Expressions
|
||||
|
||||
"""
|
||||
tree = rule_grammar.parse(rules)
|
||||
return RuleVisitor(custom_rules).visit(tree)
|
||||
|
||||
def parse(self, text, pos=0):
|
||||
"""Parse some text with the :term:`default rule`.
|
||||
|
||||
:arg pos: The index at which to start parsing
|
||||
|
||||
"""
|
||||
self._check_default_rule()
|
||||
return self.default_rule.parse(text, pos=pos)
|
||||
|
||||
def match(self, text, pos=0):
|
||||
"""Parse some text with the :term:`default rule` but not necessarily
|
||||
all the way to the end.
|
||||
|
||||
:arg pos: The index at which to start parsing
|
||||
|
||||
"""
|
||||
self._check_default_rule()
|
||||
return self.default_rule.match(text, pos=pos)
|
||||
|
||||
def _check_default_rule(self):
|
||||
"""Raise RuntimeError if there is no default rule defined."""
|
||||
if not self.default_rule:
|
||||
raise RuntimeError("Can't call parse() on a Grammar that has no "
|
||||
"default rule. Choose a specific rule instead, "
|
||||
"like some_grammar['some_rule'].parse(...).")
|
||||
|
||||
def __str__(self):
|
||||
"""Return a rule string that, when passed to the constructor, would
|
||||
reconstitute the grammar."""
|
||||
exprs = [self.default_rule] if self.default_rule else []
|
||||
exprs.extend(expr for expr in self.values() if
|
||||
expr is not self.default_rule)
|
||||
return '\n'.join(expr.as_rule() for expr in exprs)
|
||||
|
||||
def __repr__(self):
|
||||
"""Return an expression that will reconstitute the grammar."""
|
||||
return "Grammar({!r})".format(str(self))
|
||||
|
||||
|
||||
class TokenGrammar(Grammar):
|
||||
"""A Grammar which takes a list of pre-lexed tokens instead of text
|
||||
|
||||
This is useful if you want to do the lexing yourself, as a separate pass:
|
||||
for example, to implement indentation-based languages.
|
||||
|
||||
"""
|
||||
def _expressions_from_rules(self, rules, custom_rules):
|
||||
tree = rule_grammar.parse(rules)
|
||||
return TokenRuleVisitor(custom_rules).visit(tree)
|
||||
|
||||
|
||||
class BootstrappingGrammar(Grammar):
|
||||
"""The grammar used to recognize the textual rules that describe other
|
||||
grammars
|
||||
|
||||
This grammar gets its start from some hard-coded Expressions and claws its
|
||||
way from there to an expression tree that describes how to parse the
|
||||
grammar description syntax.
|
||||
|
||||
"""
|
||||
def _expressions_from_rules(self, rule_syntax, custom_rules):
|
||||
"""Return the rules for parsing the grammar definition syntax.
|
||||
|
||||
Return a 2-tuple: a dict of rule names pointing to their expressions,
|
||||
and then the top-level expression for the first rule.
|
||||
|
||||
"""
|
||||
# Hard-code enough of the rules to parse the grammar that describes the
|
||||
# grammar description language, to bootstrap:
|
||||
comment = Regex(r'#[^\r\n]*', name='comment')
|
||||
meaninglessness = OneOf(Regex(r'\s+'), comment, name='meaninglessness')
|
||||
_ = ZeroOrMore(meaninglessness, name='_')
|
||||
equals = Sequence(Literal('='), _, name='equals')
|
||||
label = Sequence(Regex(r'[a-zA-Z_][a-zA-Z_0-9]*'), _, name='label')
|
||||
reference = Sequence(label, Not(equals), name='reference')
|
||||
quantifier = Sequence(Regex(r'[*+?]'), _, name='quantifier')
|
||||
# This pattern supports empty literals. TODO: A problem?
|
||||
spaceless_literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"',
|
||||
ignore_case=True,
|
||||
dot_all=True,
|
||||
name='spaceless_literal')
|
||||
literal = Sequence(spaceless_literal, _, name='literal')
|
||||
regex = Sequence(Literal('~'),
|
||||
literal,
|
||||
Regex('[ilmsuxa]*', ignore_case=True),
|
||||
_,
|
||||
name='regex')
|
||||
atom = OneOf(reference, literal, regex, name='atom')
|
||||
quantified = Sequence(atom, quantifier, name='quantified')
|
||||
|
||||
term = OneOf(quantified, atom, name='term')
|
||||
not_term = Sequence(Literal('!'), term, _, name='not_term')
|
||||
term.members = (not_term,) + term.members
|
||||
|
||||
sequence = Sequence(term, OneOrMore(term), name='sequence')
|
||||
or_term = Sequence(Literal('/'), _, term, name='or_term')
|
||||
ored = Sequence(term, OneOrMore(or_term), name='ored')
|
||||
expression = OneOf(ored, sequence, term, name='expression')
|
||||
rule = Sequence(label, equals, expression, name='rule')
|
||||
rules = Sequence(_, OneOrMore(rule), name='rules')
|
||||
|
||||
# Use those hard-coded rules to parse the (more extensive) rule syntax.
|
||||
# (For example, unless I start using parentheses in the rule language
|
||||
# definition itself, I should never have to hard-code expressions for
|
||||
# those above.)
|
||||
|
||||
rule_tree = rules.parse(rule_syntax)
|
||||
|
||||
# Turn the parse tree into a map of expressions:
|
||||
return RuleVisitor().visit(rule_tree)
|
||||
|
||||
|
||||
# The grammar for parsing PEG grammar definitions:
|
||||
# This is a nice, simple grammar. We may someday add to it, but it's a safe bet
|
||||
# that the future will always be a superset of this.
|
||||
rule_syntax = (r'''
|
||||
# Ignored things (represented by _) are typically hung off the end of the
|
||||
# leafmost kinds of nodes. Literals like "/" count as leaves.
|
||||
|
||||
rules = _ rule*
|
||||
rule = label equals expression
|
||||
equals = "=" _
|
||||
literal = spaceless_literal _
|
||||
|
||||
# So you can't spell a regex like `~"..." ilm`:
|
||||
spaceless_literal = ~"u?r?\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\""is /
|
||||
~"u?r?'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'"is
|
||||
|
||||
expression = ored / sequence / term
|
||||
or_term = "/" _ term
|
||||
ored = term or_term+
|
||||
sequence = term term+
|
||||
not_term = "!" term _
|
||||
lookahead_term = "&" term _
|
||||
term = not_term / lookahead_term / quantified / atom
|
||||
quantified = atom quantifier
|
||||
atom = reference / literal / regex / parenthesized
|
||||
regex = "~" spaceless_literal ~"[ilmsuxa]*"i _
|
||||
parenthesized = "(" _ expression ")" _
|
||||
quantifier = ~"[*+?]" _
|
||||
reference = label !equals
|
||||
|
||||
# A subsequent equal sign is the only thing that distinguishes a label
|
||||
# (which begins a new rule) from a reference (which is just a pointer to a
|
||||
# rule defined somewhere else):
|
||||
label = ~"[a-zA-Z_][a-zA-Z_0-9]*" _
|
||||
|
||||
# _ = ~r"\s*(?:#[^\r\n]*)?\s*"
|
||||
_ = meaninglessness*
|
||||
meaninglessness = ~r"\s+" / comment
|
||||
comment = ~r"#[^\r\n]*"
|
||||
''')
|
||||
|
||||
|
||||
class LazyReference(str):
|
||||
"""A lazy reference to a rule, which we resolve after grokking all the
|
||||
rules"""
|
||||
|
||||
name = u''
|
||||
|
||||
# Just for debugging:
|
||||
def _as_rhs(self):
|
||||
return u'<LazyReference to %s>' % self
|
||||
|
||||
|
||||
class RuleVisitor(NodeVisitor):
|
||||
"""Turns a parse tree of a grammar definition into a map of ``Expression``
|
||||
objects
|
||||
|
||||
This is the magic piece that breathes life into a parsed bunch of parse
|
||||
rules, allowing them to go forth and parse other things.
|
||||
|
||||
"""
|
||||
quantifier_classes = {'?': Optional, '*': ZeroOrMore, '+': OneOrMore}
|
||||
|
||||
visit_expression = visit_term = visit_atom = NodeVisitor.lift_child
|
||||
|
||||
def __init__(self, custom_rules=None):
|
||||
"""Construct.
|
||||
|
||||
:arg custom_rules: A dict of {rule name: expression} holding custom
|
||||
rules which will take precedence over the others
|
||||
|
||||
"""
|
||||
self.custom_rules = custom_rules or {}
|
||||
|
||||
def visit_parenthesized(self, node, parenthesized):
|
||||
"""Treat a parenthesized subexpression as just its contents.
|
||||
|
||||
Its position in the tree suffices to maintain its grouping semantics.
|
||||
|
||||
"""
|
||||
left_paren, _, expression, right_paren, _ = parenthesized
|
||||
return expression
|
||||
|
||||
def visit_quantifier(self, node, quantifier):
|
||||
"""Turn a quantifier into just its symbol-matching node."""
|
||||
symbol, _ = quantifier
|
||||
return symbol
|
||||
|
||||
def visit_quantified(self, node, quantified):
|
||||
atom, quantifier = quantified
|
||||
return self.quantifier_classes[quantifier.text](atom)
|
||||
|
||||
def visit_lookahead_term(self, node, lookahead_term):
|
||||
ampersand, term, _ = lookahead_term
|
||||
return Lookahead(term)
|
||||
|
||||
def visit_not_term(self, node, not_term):
|
||||
exclamation, term, _ = not_term
|
||||
return Not(term)
|
||||
|
||||
def visit_rule(self, node, rule):
|
||||
"""Assign a name to the Expression and return it."""
|
||||
label, equals, expression = rule
|
||||
expression.name = label # Assign a name to the expr.
|
||||
return expression
|
||||
|
||||
def visit_sequence(self, node, sequence):
|
||||
"""A parsed Sequence looks like [term node, OneOrMore node of
|
||||
``another_term``s]. Flatten it out."""
|
||||
term, other_terms = sequence
|
||||
return Sequence(term, *other_terms)
|
||||
|
||||
def visit_ored(self, node, ored):
|
||||
first_term, other_terms = ored
|
||||
return OneOf(first_term, *other_terms)
|
||||
|
||||
def visit_or_term(self, node, or_term):
|
||||
"""Return just the term from an ``or_term``.
|
||||
|
||||
We already know it's going to be ored, from the containing ``ored``.
|
||||
|
||||
"""
|
||||
slash, _, term = or_term
|
||||
return term
|
||||
|
||||
def visit_label(self, node, label):
|
||||
"""Turn a label into a unicode string."""
|
||||
name, _ = label
|
||||
return name.text
|
||||
|
||||
def visit_reference(self, node, reference):
|
||||
"""Stick a :class:`LazyReference` in the tree as a placeholder.
|
||||
|
||||
We resolve them all later.
|
||||
|
||||
"""
|
||||
label, not_equals = reference
|
||||
return LazyReference(label)
|
||||
|
||||
def visit_regex(self, node, regex):
|
||||
"""Return a ``Regex`` expression."""
|
||||
tilde, literal, flags, _ = regex
|
||||
flags = flags.text.upper()
|
||||
pattern = literal.literal # Pull the string back out of the Literal
|
||||
# object.
|
||||
return Regex(pattern, ignore_case='I' in flags,
|
||||
locale='L' in flags,
|
||||
multiline='M' in flags,
|
||||
dot_all='S' in flags,
|
||||
unicode='U' in flags,
|
||||
verbose='X' in flags,
|
||||
ascii='A' in flags)
|
||||
|
||||
def visit_spaceless_literal(self, spaceless_literal, visited_children):
|
||||
"""Turn a string literal into a ``Literal`` that recognizes it."""
|
||||
return Literal(evaluate_string(spaceless_literal.text))
|
||||
|
||||
def visit_literal(self, node, literal):
|
||||
"""Pick just the literal out of a literal-and-junk combo."""
|
||||
spaceless_literal, _ = literal
|
||||
return spaceless_literal
|
||||
|
||||
def generic_visit(self, node, visited_children):
|
||||
"""Replace childbearing nodes with a list of their children; keep
|
||||
others untouched.
|
||||
|
||||
For our case, if a node has children, only the children are important.
|
||||
Otherwise, keep the node around for (for example) the flags of the
|
||||
regex rule. Most of these kept-around nodes are subsequently thrown
|
||||
away by the other visitor methods.
|
||||
|
||||
We can't simply hang the visited children off the original node; that
|
||||
would be disastrous if the node occurred in more than one place in the
|
||||
tree.
|
||||
|
||||
"""
|
||||
return visited_children or node # should semantically be a tuple
|
||||
|
||||
def _resolve_refs(self, rule_map, expr, done):
|
||||
"""Return an expression with all its lazy references recursively
|
||||
resolved.
|
||||
|
||||
Resolve any lazy references in the expression ``expr``, recursing into
|
||||
all subexpressions.
|
||||
|
||||
:arg done: The set of Expressions that have already been or are
|
||||
currently being resolved, to ward off redundant work and prevent
|
||||
infinite recursion for circular refs
|
||||
|
||||
"""
|
||||
if isinstance(expr, LazyReference):
|
||||
label = str(expr)
|
||||
try:
|
||||
reffed_expr = rule_map[label]
|
||||
except KeyError:
|
||||
raise UndefinedLabel(expr)
|
||||
return self._resolve_refs(rule_map, reffed_expr, done)
|
||||
else:
|
||||
if getattr(expr, 'members', ()) and expr not in done:
|
||||
# Prevents infinite recursion for circular refs. At worst, one
|
||||
# of `expr.members` can refer back to `expr`, but it can't go
|
||||
# any farther.
|
||||
done.add(expr)
|
||||
expr.members = tuple(self._resolve_refs(rule_map, member, done)
|
||||
for member in expr.members)
|
||||
return expr
|
||||
|
||||
def visit_rules(self, node, rules_list):
|
||||
"""Collate all the rules into a map. Return (map, default rule).
|
||||
|
||||
The default rule is the first one. Or, if you have more than one rule
|
||||
of that name, it's the last-occurring rule of that name. (This lets you
|
||||
override the default rule when you extend a grammar.) If there are no
|
||||
string-based rules, the default rule is None, because the custom rules,
|
||||
due to being kwarg-based, are unordered.
|
||||
|
||||
"""
|
||||
_, rules = rules_list
|
||||
|
||||
# Map each rule's name to its Expression. Later rules of the same name
|
||||
# override earlier ones. This lets us define rules multiple times and
|
||||
# have the last declaration win, so you can extend grammars by
|
||||
# concatenation.
|
||||
rule_map = OrderedDict((expr.name, expr) for expr in rules)
|
||||
|
||||
# And custom rules override string-based rules. This is the least
|
||||
# surprising choice when you compare the dict constructor:
|
||||
# dict({'x': 5}, x=6).
|
||||
rule_map.update(self.custom_rules)
|
||||
|
||||
# Resolve references. This tolerates forward references.
|
||||
done = set()
|
||||
rule_map = OrderedDict((expr.name, self._resolve_refs(rule_map, expr, done))
|
||||
for expr in rule_map.values())
|
||||
|
||||
# isinstance() is a temporary hack around the fact that * rules don't
|
||||
# always get transformed into lists by NodeVisitor. We should fix that;
|
||||
# it's surprising and requires writing lame branches like this.
|
||||
return rule_map, (rule_map[rules[0].name]
|
||||
if isinstance(rules, list) and rules else None)
|
||||
|
||||
|
||||
class TokenRuleVisitor(RuleVisitor):
|
||||
"""A visitor which builds expression trees meant to work on sequences of
|
||||
pre-lexed tokens rather than strings"""
|
||||
|
||||
def visit_spaceless_literal(self, spaceless_literal, visited_children):
|
||||
"""Turn a string literal into a ``TokenMatcher`` that matches
|
||||
``Token`` objects by their ``type`` attributes."""
|
||||
return TokenMatcher(evaluate_string(spaceless_literal.text))
|
||||
|
||||
def visit_regex(self, node, regex):
|
||||
tilde, literal, flags, _ = regex
|
||||
raise BadGrammar('Regexes do not make sense in TokenGrammars, since '
|
||||
'TokenGrammars operate on pre-lexed tokens rather '
|
||||
'than characters.')
|
||||
|
||||
|
||||
# Bootstrap to level 1...
|
||||
rule_grammar = BootstrappingGrammar(rule_syntax)
|
||||
# ...and then to level 2. This establishes that the node tree of our rule
|
||||
# syntax is built by the same machinery that will build trees of our users'
|
||||
# grammars. And the correctness of that tree is tested, indirectly, in
|
||||
# test_grammar.
|
||||
rule_grammar = Grammar(rule_syntax)
|
||||
|
||||
|
||||
# TODO: Teach Expression trees how to spit out Python representations of
|
||||
# themselves. Then we can just paste that in above, and we won't have to
|
||||
# bootstrap on import. Though it'll be a little less DRY. [Ah, but this is not
|
||||
# so clean, because it would have to output multiple statements to get multiple
|
||||
# refs to a single expression hooked up.]
|
||||
325
ccxt/static_dependencies/parsimonious/nodes.py
Normal file
325
ccxt/static_dependencies/parsimonious/nodes.py
Normal file
@@ -0,0 +1,325 @@
|
||||
"""Nodes that make up parse trees
|
||||
|
||||
Parsing spits out a tree of these, which you can then tell to walk itself and
|
||||
spit out a useful value. Or you can walk it yourself; the structural attributes
|
||||
are public.
|
||||
|
||||
"""
|
||||
# TODO: If this is slow, think about using cElementTree or something.
|
||||
from inspect import isfunction
|
||||
from sys import version_info, exc_info
|
||||
|
||||
from .exceptions import VisitationError, UndefinedLabel
|
||||
|
||||
|
||||
class Node(object):
|
||||
"""A parse tree node
|
||||
|
||||
Consider these immutable once constructed. As a side effect of a
|
||||
memory-saving strategy in the cache, multiple references to a single
|
||||
``Node`` might be returned in a single parse tree. So, if you start
|
||||
messing with one, you'll see surprising parallel changes pop up elsewhere.
|
||||
|
||||
My philosophy is that parse trees (and their nodes) should be
|
||||
representation-agnostic. That is, they shouldn't get all mixed up with what
|
||||
the final rendered form of a wiki page (or the intermediate representation
|
||||
of a programming language, or whatever) is going to be: you should be able
|
||||
to parse once and render several representations from the tree, one after
|
||||
another.
|
||||
|
||||
"""
|
||||
# I tried making this subclass list, but it got ugly. I had to construct
|
||||
# invalid ones and patch them up later, and there were other problems.
|
||||
__slots__ = ['expr', # The expression that generated me
|
||||
'full_text', # The full text fed to the parser
|
||||
'start', # The position in the text where that expr started matching
|
||||
'end', # The position after start where the expr first didn't
|
||||
# match. [start:end] follow Python slice conventions.
|
||||
'children'] # List of child parse tree nodes
|
||||
|
||||
def __init__(self, expr, full_text, start, end, children=None):
|
||||
self.expr = expr
|
||||
self.full_text = full_text
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.children = children or []
|
||||
|
||||
@property
|
||||
def expr_name(self):
|
||||
# backwards compatibility
|
||||
return self.expr.name
|
||||
|
||||
def __iter__(self):
|
||||
"""Support looping over my children and doing tuple unpacks on me.
|
||||
|
||||
It can be very handy to unpack nodes in arg lists; see
|
||||
:class:`PegVisitor` for an example.
|
||||
|
||||
"""
|
||||
return iter(self.children)
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
"""Return the text this node matched."""
|
||||
return self.full_text[self.start:self.end]
|
||||
|
||||
# From here down is just stuff for testing and debugging.
|
||||
|
||||
def prettily(self, error=None):
|
||||
"""Return a unicode, pretty-printed representation of me.
|
||||
|
||||
:arg error: The node to highlight because an error occurred there
|
||||
|
||||
"""
|
||||
# TODO: If a Node appears multiple times in the tree, we'll point to
|
||||
# them all. Whoops.
|
||||
def indent(text):
|
||||
return '\n'.join((' ' + line) for line in text.splitlines())
|
||||
ret = [u'<%s%s matching "%s">%s' % (
|
||||
self.__class__.__name__,
|
||||
(' called "%s"' % self.expr_name) if self.expr_name else '',
|
||||
self.text,
|
||||
' <-- *** We were here. ***' if error is self else '')]
|
||||
for n in self:
|
||||
ret.append(indent(n.prettily(error=error)))
|
||||
return '\n'.join(ret)
|
||||
|
||||
def __str__(self):
|
||||
"""Return a compact, human-readable representation of me."""
|
||||
return self.prettily()
|
||||
|
||||
def __eq__(self, other):
|
||||
"""Support by-value deep comparison with other nodes for testing."""
|
||||
if not isinstance(other, Node):
|
||||
return NotImplemented
|
||||
|
||||
return (self.expr == other.expr and
|
||||
self.full_text == other.full_text and
|
||||
self.start == other.start and
|
||||
self.end == other.end and
|
||||
self.children == other.children)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self, top_level=True):
|
||||
"""Return a bit of code (though not an expression) that will recreate
|
||||
me."""
|
||||
# repr() of unicode flattens everything out to ASCII, so we don't need
|
||||
# to explicitly encode things afterward.
|
||||
ret = ["s = %r" % self.full_text] if top_level else []
|
||||
ret.append("%s(%r, s, %s, %s%s)" % (
|
||||
self.__class__.__name__,
|
||||
self.expr,
|
||||
self.start,
|
||||
self.end,
|
||||
(', children=[%s]' %
|
||||
', '.join([c.__repr__(top_level=False) for c in self.children]))
|
||||
if self.children else ''))
|
||||
return '\n'.join(ret)
|
||||
|
||||
|
||||
class RegexNode(Node):
|
||||
"""Node returned from a ``Regex`` expression
|
||||
|
||||
Grants access to the ``re.Match`` object, in case you want to access
|
||||
capturing groups, etc.
|
||||
|
||||
"""
|
||||
__slots__ = ['match']
|
||||
|
||||
|
||||
class RuleDecoratorMeta(type):
|
||||
def __new__(metaclass, name, bases, namespace):
|
||||
def unvisit(name):
|
||||
"""Remove any leading "visit_" from a method name."""
|
||||
return name[6:] if name.startswith('visit_') else name
|
||||
|
||||
methods = [v for k, v in namespace.items() if
|
||||
hasattr(v, '_rule') and isfunction(v)]
|
||||
if methods:
|
||||
from .grammar import Grammar # circular import dodge
|
||||
|
||||
methods.sort(key=(lambda x: x.func_code.co_firstlineno)
|
||||
if version_info[0] < 3 else
|
||||
(lambda x: x.__code__.co_firstlineno))
|
||||
# Possible enhancement: once we get the Grammar extensibility story
|
||||
# solidified, we can have @rules *add* to the default grammar
|
||||
# rather than pave over it.
|
||||
namespace['grammar'] = Grammar(
|
||||
'\n'.join('{name} = {expr}'.format(name=unvisit(m.__name__),
|
||||
expr=m._rule)
|
||||
for m in methods))
|
||||
return super(RuleDecoratorMeta,
|
||||
metaclass).__new__(metaclass, name, bases, namespace)
|
||||
|
||||
|
||||
class NodeVisitor(object, metaclass=RuleDecoratorMeta):
|
||||
"""A shell for writing things that turn parse trees into something useful
|
||||
|
||||
Performs a depth-first traversal of an AST. Subclass this, add methods for
|
||||
each expr you care about, instantiate, and call
|
||||
``visit(top_node_of_parse_tree)``. It'll return the useful stuff. This API
|
||||
is very similar to that of ``ast.NodeVisitor``.
|
||||
|
||||
These could easily all be static methods, but that would add at least as
|
||||
much weirdness at the call site as the ``()`` for instantiation. And this
|
||||
way, we support subclasses that require state: options, for example, or a
|
||||
symbol table constructed from a programming language's AST.
|
||||
|
||||
We never transform the parse tree in place, because...
|
||||
|
||||
* There are likely multiple references to the same ``Node`` object in a
|
||||
parse tree, and changes to one reference would surprise you elsewhere.
|
||||
* It makes it impossible to report errors: you'd end up with the "error"
|
||||
arrow pointing someplace in a half-transformed mishmash of nodes--and
|
||||
that's assuming you're even transforming the tree into another tree.
|
||||
Heaven forbid you're making it into a string or something else.
|
||||
|
||||
"""
|
||||
|
||||
#: The :term:`default grammar`: the one recommended for use with this
|
||||
#: visitor. If you populate this, you will be able to call
|
||||
#: :meth:`NodeVisitor.parse()` as a shortcut.
|
||||
grammar = None
|
||||
|
||||
#: Classes of exceptions you actually intend to raise during visitation
|
||||
#: and which should propagate out of the visitor. These will not be
|
||||
#: wrapped in a VisitationError when they arise.
|
||||
unwrapped_exceptions = ()
|
||||
|
||||
# TODO: If we need to optimize this, we can go back to putting subclasses
|
||||
# in charge of visiting children; they know when not to bother. Or we can
|
||||
# mark nodes as not descent-worthy in the grammar.
|
||||
def visit(self, node):
|
||||
"""Walk a parse tree, transforming it into another representation.
|
||||
|
||||
Recursively descend a parse tree, dispatching to the method named after
|
||||
the rule in the :class:`~.grammar.Grammar` that produced
|
||||
each node. If, for example, a rule was... ::
|
||||
|
||||
bold = '<b>'
|
||||
|
||||
...the ``visit_bold()`` method would be called. It is your
|
||||
responsibility to subclass :class:`NodeVisitor` and implement those
|
||||
methods.
|
||||
|
||||
"""
|
||||
method = getattr(self, 'visit_' + node.expr_name, self.generic_visit)
|
||||
|
||||
# Call that method, and show where in the tree it failed if it blows
|
||||
# up.
|
||||
try:
|
||||
return method(node, [self.visit(n) for n in node])
|
||||
except (VisitationError, UndefinedLabel):
|
||||
# Don't catch and re-wrap already-wrapped exceptions.
|
||||
raise
|
||||
except Exception as exc:
|
||||
# implentors may define exception classes that should not be
|
||||
# wrapped.
|
||||
if isinstance(exc, self.unwrapped_exceptions):
|
||||
raise
|
||||
# Catch any exception, and tack on a parse tree so it's easier to
|
||||
# see where it went wrong.
|
||||
exc_class = type(exc)
|
||||
raise VisitationError(exc, exc_class, node)
|
||||
|
||||
def generic_visit(self, node, visited_children):
|
||||
"""Default visitor method
|
||||
|
||||
:arg node: The node we're visiting
|
||||
:arg visited_children: The results of visiting the children of that
|
||||
node, in a list
|
||||
|
||||
I'm not sure there's an implementation of this that makes sense across
|
||||
all (or even most) use cases, so we leave it to subclasses to implement
|
||||
for now.
|
||||
|
||||
"""
|
||||
raise NotImplementedError('No visitor method was defined for this expression: %s' %
|
||||
node.expr.as_rule())
|
||||
|
||||
# Convenience methods:
|
||||
|
||||
def parse(self, text, pos=0):
|
||||
"""Parse some text with this Visitor's default grammar and return the
|
||||
result of visiting it.
|
||||
|
||||
``SomeVisitor().parse('some_string')`` is a shortcut for
|
||||
``SomeVisitor().visit(some_grammar.parse('some_string'))``.
|
||||
|
||||
"""
|
||||
return self._parse_or_match(text, pos, 'parse')
|
||||
|
||||
def match(self, text, pos=0):
|
||||
"""Parse and visit some text with this Visitor's default grammar, but
|
||||
don't insist on parsing all the way to the end.
|
||||
|
||||
``SomeVisitor().match('some_string')`` is a shortcut for
|
||||
``SomeVisitor().visit(some_grammar.match('some_string'))``.
|
||||
|
||||
"""
|
||||
return self._parse_or_match(text, pos, 'match')
|
||||
|
||||
# Internal convenience methods to help you write your own visitors:
|
||||
|
||||
def lift_child(self, node, children):
|
||||
"""Lift the sole child of ``node`` up to replace the node."""
|
||||
first_child, = children
|
||||
return first_child
|
||||
|
||||
# Private methods:
|
||||
|
||||
def _parse_or_match(self, text, pos, method_name):
|
||||
"""Execute a parse or match on the default grammar, followed by a
|
||||
visitation.
|
||||
|
||||
Raise RuntimeError if there is no default grammar specified.
|
||||
|
||||
"""
|
||||
if not self.grammar:
|
||||
raise RuntimeError(
|
||||
"The {cls}.{method}() shortcut won't work because {cls} was "
|
||||
"never associated with a specific " "grammar. Fill out its "
|
||||
"`grammar` attribute, and try again.".format(
|
||||
cls=self.__class__.__name__,
|
||||
method=method_name))
|
||||
return self.visit(getattr(self.grammar, method_name)(text, pos=pos))
|
||||
|
||||
|
||||
def rule(rule_string):
|
||||
"""Decorate a NodeVisitor ``visit_*`` method to tie a grammar rule to it.
|
||||
|
||||
The following will arrange for the ``visit_digit`` method to receive the
|
||||
results of the ``~"[0-9]"`` parse rule::
|
||||
|
||||
@rule('~"[0-9]"')
|
||||
def visit_digit(self, node, visited_children):
|
||||
...
|
||||
|
||||
Notice that there is no "digit = " as part of the rule; that gets inferred
|
||||
from the method name.
|
||||
|
||||
In cases where there is only one kind of visitor interested in a grammar,
|
||||
using ``@rule`` saves you having to look back and forth between the visitor
|
||||
and the grammar definition.
|
||||
|
||||
On an implementation level, all ``@rule`` rules get stitched together into
|
||||
a :class:`~.Grammar` that becomes the NodeVisitor's
|
||||
:term:`default grammar`.
|
||||
|
||||
Typically, the choice of a default rule for this grammar is simple: whatever
|
||||
``@rule`` comes first in the class is the default. But the choice may become
|
||||
surprising if you divide the ``@rule`` calls among subclasses. At the
|
||||
moment, which method "comes first" is decided simply by comparing line
|
||||
numbers, so whatever method is on the smallest-numbered line will be the
|
||||
default. In a future release, this will change to pick the
|
||||
first ``@rule`` call on the basemost class that has one. That way, a
|
||||
subclass which does not override the default rule's ``visit_*`` method
|
||||
won't unintentionally change which rule is the default.
|
||||
|
||||
"""
|
||||
def decorator(method):
|
||||
method._rule = rule_string # XXX: Maybe register them on a class var instead so we can just override a @rule'd visitor method on a subclass without blowing away the rule string that comes with it.
|
||||
return method
|
||||
return decorator
|
||||
40
ccxt/static_dependencies/parsimonious/utils.py
Normal file
40
ccxt/static_dependencies/parsimonious/utils.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""General tools which don't depend on other parts of Parsimonious"""
|
||||
|
||||
import ast
|
||||
|
||||
|
||||
class StrAndRepr(object):
|
||||
"""Mix-in which gives the class the same __repr__ and __str__."""
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
def evaluate_string(string):
|
||||
"""Piggyback on Python's string support so we can have backslash escaping
|
||||
and niceties like \n, \t, etc. string.decode('string_escape') would have
|
||||
been a lower-level possibility.
|
||||
|
||||
"""
|
||||
return ast.literal_eval(string)
|
||||
|
||||
|
||||
class Token(StrAndRepr):
|
||||
"""A class to represent tokens, for use with TokenGrammars
|
||||
|
||||
You will likely want to subclass this to hold additional information, like
|
||||
the characters that you lexed to create this token. Alternately, feel free
|
||||
to create your own class from scratch. The only contract is that tokens
|
||||
must have a ``type`` attr.
|
||||
|
||||
"""
|
||||
__slots__ = ['type']
|
||||
|
||||
def __init__(self, type):
|
||||
self.type = type
|
||||
|
||||
def __str__(self):
|
||||
return u'<Token "%s">' % (self.type,)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.type == other.type
|
||||
Reference in New Issue
Block a user