Source code for pylfg.parser_gen
# parser_gen.py
"""
The module parser_gen.py is used to convert the XLFG grammar and lexicon format to the format required by the parser generator and also generates a parser using the converted grammar and lexicon. The module contains two classes: ParserGenerator and CompiledLfgParser.
"""
import plyplus
from .parse_tree import LFGParseTree, LFGParseTreeNode, LFGParseTreeNodeF
[docs]class ParserGenerator:
"""
The ParserGenerator class is used to convert the LFG grammar and lexicon format to the format required by the parser generator.
It also generates a parser using the converted grammar and lexicon.
"""
def __init__(self, grammar_format):
self.grammar_format = grammar_format
[docs] def convert_grammar(self, grammar: dict) -> str:
"""
Convert the LFG grammar format to the format required by the parser generator
"""
if self.grammar_format != "xlfg":
raise ValueError(f"Unsupported grammar format for this method: {self.grammar_format}")
converted_grammar = ""
for lhs, rhs_list in grammar.items():
for rhs in rhs_list:
c_constraints, f_constraints = rhs[2], rhs[3]
c_constraints_str = " ".join(c_constraints)
f_constraints_str = " ".join(f_constraints)
rule = f"{lhs} -> {rhs[0]} {rhs[1]} [c: {c_constraints_str}] [f: {f_constraints_str}]"
converted_grammar += rule + "\n"
return converted_grammar
[docs] def convert_lexicon(self, lexicon: dict) -> str:
"""Convert the current lexicon format to the format required by the parser generator
"""
if self.grammar_format == "xlfg":
lexicon_str = ""
for word, entries in lexicon.items():
for entry in entries:
lexicon_str += f"{word}:{entry}\n"
return lexicon_str
elif self.grammar_format == "xle":
# adapt the conversion for the xle format
return converted_lexicon
else:
raise ValueError(f"Unsupported grammar format: {self.grammar_format}")
[docs] def generate_parser(self, grammar: str, lexicon: str):
"""Use the parser generator to generate a parser"""
grammar = self.convert_grammar(grammar)
lexicon = self.convert_lexicon(lexicon)
parser = plyplus.Grammar(grammar, lexicon)
return parser
[docs]class CompiledLfgParser:
"""
The CompiledLfgParser class is used to generate a parser from a given grammar and lexicon, and then use that parser to parse sentences and build parse trees.
Attributes:
grammar_format (str): The format of the input grammar, should be "xlfg" or "xle".
grammar (dict): The grammar in the specified format.
lexicon (dict): The lexicon in the specified format.
parser_generator (ParserGenerator): An instance of the ParserGenerator class, used to generate the parser.
Methods:
parse(sentence: str) -> list:
Uses the generated parser to parse the given sentence and build parse trees.
Returns a list of LFGParseTree objects.
"""
def __init__(self, grammar_format, grammar: dict, lexicon: dict):
self.grammar_format = grammar_format
self.grammar = grammar
self.lexicon = lexicon
self.parser_generator = ParserGenerator(grammar_format)
[docs] def parse(self, sentence: str) -> list:
"""
Takes in a string sentence and returns a list of LFGParseTree objects, generated from the parsed sentence using the pre-compiled grammar and lexicon.
"""
parser = self.parser_generator.generate_parser(self.grammar, self.lexicon)
# use the generated parser to parse the sentence and build parse trees
parse_trees = []
for tree in parser.parse(sentence):
parse_tree = LFGParseTree(tree)
parse_trees.append(parse_tree)
return parse_trees