patcomp.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. # Copyright 2006 Google, Inc. All Rights Reserved.
  2. # Licensed to PSF under a Contributor Agreement.
  3. """Pattern compiler.
  4. The grammar is taken from PatternGrammar.txt.
  5. The compiler compiles a pattern to a pytree.*Pattern instance.
  6. """
  7. __author__ = "Guido van Rossum <guido@python.org>"
  8. # Python imports
  9. import io
  10. # Fairly local imports
  11. from .pgen2 import driver, literals, token, tokenize, parse, grammar
  12. # Really local imports
  13. from . import pytree
  14. from . import pygram
  15. class PatternSyntaxError(Exception):
  16. pass
  17. def tokenize_wrapper(input):
  18. """Tokenizes a string suppressing significant whitespace."""
  19. skip = {token.NEWLINE, token.INDENT, token.DEDENT}
  20. tokens = tokenize.generate_tokens(io.StringIO(input).readline)
  21. for quintuple in tokens:
  22. type, value, start, end, line_text = quintuple
  23. if type not in skip:
  24. yield quintuple
  25. class PatternCompiler(object):
  26. def __init__(self, grammar_file=None):
  27. """Initializer.
  28. Takes an optional alternative filename for the pattern grammar.
  29. """
  30. if grammar_file is None:
  31. self.grammar = pygram.pattern_grammar
  32. self.syms = pygram.pattern_symbols
  33. else:
  34. self.grammar = driver.load_grammar(grammar_file)
  35. self.syms = pygram.Symbols(self.grammar)
  36. self.pygrammar = pygram.python_grammar
  37. self.pysyms = pygram.python_symbols
  38. self.driver = driver.Driver(self.grammar, convert=pattern_convert)
  39. def compile_pattern(self, input, debug=False, with_tree=False):
  40. """Compiles a pattern string to a nested pytree.*Pattern object."""
  41. tokens = tokenize_wrapper(input)
  42. try:
  43. root = self.driver.parse_tokens(tokens, debug=debug)
  44. except parse.ParseError as e:
  45. raise PatternSyntaxError(str(e)) from None
  46. if with_tree:
  47. return self.compile_node(root), root
  48. else:
  49. return self.compile_node(root)
  50. def compile_node(self, node):
  51. """Compiles a node, recursively.
  52. This is one big switch on the node type.
  53. """
  54. # XXX Optimize certain Wildcard-containing-Wildcard patterns
  55. # that can be merged
  56. if node.type == self.syms.Matcher:
  57. node = node.children[0] # Avoid unneeded recursion
  58. if node.type == self.syms.Alternatives:
  59. # Skip the odd children since they are just '|' tokens
  60. alts = [self.compile_node(ch) for ch in node.children[::2]]
  61. if len(alts) == 1:
  62. return alts[0]
  63. p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
  64. return p.optimize()
  65. if node.type == self.syms.Alternative:
  66. units = [self.compile_node(ch) for ch in node.children]
  67. if len(units) == 1:
  68. return units[0]
  69. p = pytree.WildcardPattern([units], min=1, max=1)
  70. return p.optimize()
  71. if node.type == self.syms.NegatedUnit:
  72. pattern = self.compile_basic(node.children[1:])
  73. p = pytree.NegatedPattern(pattern)
  74. return p.optimize()
  75. assert node.type == self.syms.Unit
  76. name = None
  77. nodes = node.children
  78. if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
  79. name = nodes[0].value
  80. nodes = nodes[2:]
  81. repeat = None
  82. if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
  83. repeat = nodes[-1]
  84. nodes = nodes[:-1]
  85. # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
  86. pattern = self.compile_basic(nodes, repeat)
  87. if repeat is not None:
  88. assert repeat.type == self.syms.Repeater
  89. children = repeat.children
  90. child = children[0]
  91. if child.type == token.STAR:
  92. min = 0
  93. max = pytree.HUGE
  94. elif child.type == token.PLUS:
  95. min = 1
  96. max = pytree.HUGE
  97. elif child.type == token.LBRACE:
  98. assert children[-1].type == token.RBRACE
  99. assert len(children) in (3, 5)
  100. min = max = self.get_int(children[1])
  101. if len(children) == 5:
  102. max = self.get_int(children[3])
  103. else:
  104. assert False
  105. if min != 1 or max != 1:
  106. pattern = pattern.optimize()
  107. pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
  108. if name is not None:
  109. pattern.name = name
  110. return pattern.optimize()
  111. def compile_basic(self, nodes, repeat=None):
  112. # Compile STRING | NAME [Details] | (...) | [...]
  113. assert len(nodes) >= 1
  114. node = nodes[0]
  115. if node.type == token.STRING:
  116. value = str(literals.evalString(node.value))
  117. return pytree.LeafPattern(_type_of_literal(value), value)
  118. elif node.type == token.NAME:
  119. value = node.value
  120. if value.isupper():
  121. if value not in TOKEN_MAP:
  122. raise PatternSyntaxError("Invalid token: %r" % value)
  123. if nodes[1:]:
  124. raise PatternSyntaxError("Can't have details for token")
  125. return pytree.LeafPattern(TOKEN_MAP[value])
  126. else:
  127. if value == "any":
  128. type = None
  129. elif not value.startswith("_"):
  130. type = getattr(self.pysyms, value, None)
  131. if type is None:
  132. raise PatternSyntaxError("Invalid symbol: %r" % value)
  133. if nodes[1:]: # Details present
  134. content = [self.compile_node(nodes[1].children[1])]
  135. else:
  136. content = None
  137. return pytree.NodePattern(type, content)
  138. elif node.value == "(":
  139. return self.compile_node(nodes[1])
  140. elif node.value == "[":
  141. assert repeat is None
  142. subpattern = self.compile_node(nodes[1])
  143. return pytree.WildcardPattern([[subpattern]], min=0, max=1)
  144. assert False, node
  145. def get_int(self, node):
  146. assert node.type == token.NUMBER
  147. return int(node.value)
  148. # Map named tokens to the type value for a LeafPattern
  149. TOKEN_MAP = {"NAME": token.NAME,
  150. "STRING": token.STRING,
  151. "NUMBER": token.NUMBER,
  152. "TOKEN": None}
  153. def _type_of_literal(value):
  154. if value[0].isalpha():
  155. return token.NAME
  156. elif value in grammar.opmap:
  157. return grammar.opmap[value]
  158. else:
  159. return None
  160. def pattern_convert(grammar, raw_node_info):
  161. """Converts raw node information to a Node or Leaf instance."""
  162. type, value, context, children = raw_node_info
  163. if children or type in grammar.number2symbol:
  164. return pytree.Node(type, children, context=context)
  165. else:
  166. return pytree.Leaf(type, value, context=context)
  167. def compile_pattern(pattern):
  168. return PatternCompiler().compile_pattern(pattern)