Source code for pylfg.xlfg

"""
The file xlfg.py contains several functions that pertain to processing and manipulating phrase structure rules and lexicon entries in the eXtended Lexical Functional Grammar (XLFG) formalism.
"""

from typing import *
from .parse_tree import *

[docs]def parse_rule(rule: str) -> Tuple[str, List[str]]: """ Given a string representation of a XLFG phrase structure rule, returns a tuple of the rule in the format "LHS → RHS" and a list of c-structure constraints. :param rule: the string representation of a XLFG phrase structure rule :return: a tuple of the rule in the format "LHS → RHS" and a list of c-structure constraints """ c_structure_constraints = re.findall(r"{.*?}", rule) rule = rule.replace(" ".join(c_structure_constraints),"").strip() lhs, rhs = rule.split("→") lhs = lhs.strip() rhs = [x.strip() for x in rhs.split()] return lhs, rhs, c_structure_constraints
[docs]def parse_lexicon_entry(lexicon_entry: str) -> dict: """ Given a string representation of a XLFG lexicon entry, returns a dictionary of functional labels and their values. :param lexicon_entry: the string representation of a XLFG lexicon entry :return: a dictionary of functional labels and their values """ functional_labels = {} lexicon_entry = lexicon_entry.replace("[", "").replace("]", "").replace(";", "") labels = lexicon_entry.split() for label in labels: parts = label.split('=') if len(parts) == 2: functional_labels[parts[0].strip()] = parts[1].strip() return functional_labels
[docs]def match_constraints(rule: str, lexicon_entry: dict) -> bool: """ Given a rule and lexicon entry, check if the constraints in the rule are satisfied by the lexicon entry. :param rule: the rule to check constraints against :param lexicon_entry: the lexicon entry to check against the rule's constraints :return: True if the constraints are satisfied, False otherwise """ # Extract the functional constraints from the rule match = re.search(f"\\{{(.*?)\\}}", rule) if match: constraints = match.group(1) else: # If there are no constraints specified in the rule, return True return True
[docs]def match_c_constraints(rule, tokens, i): """ Given a rule, a list of tokens, and an index, check if the c-structure constraints in the rule are satisfied by the token at the given index. :param rule: the rule to check constraints against :param tokens: a list of tokens :param i: the index of the token to check against the rule's c-structure constraints :return: True if the constraints are satisfied, False otherwise """ c_structure_constraints = rule.c_structure_constraints for constraint in c_structure_constraints: match = re.search(constraint, tokens[i]) if not match: return False return True
[docs]def match_f_constraints(rule, lexicon_entry): """ Given a rule and a lexicon entry, check if the f-structure constraints in the rule are satisfied by the lexicon entry. :param rule: the rule to check constraints against :param lexicon_entry: the lexicon entry to check against the rule's f-structure constraints :return: True if the constraints are satisfied, False otherwise """ f_structure_constraints = rule[2] for constraint in f_structure_constraints: if not constraint.is_valid(lexicon_entry): return False return True
[docs]def impose_constraints_in_tree(tree: LFGParseTreeNodeF, constraints: dict): """ Impose the constraints on the parse tree. :param tree: the root node of the parse tree :param constraints: the constraints, in the form of a dictionary where the keys are the functional labels and the values are the corresponding label values """ for label, value in constraints.items(): tree.add_functional_label(label, value) for child in tree.children: if isinstance(child, LFGParseTreeNodeF): impose_constraints_in_tree(child, constraints)
[docs]def remove_unused_constraints(node: LFGParseTreeNodeF): """ remove unused constraint labels from a parse tree node and its children """ # first remove constraints from the current node labels_to_remove = set(node.get_all_functional_labels().keys()) for child in node.children: if isinstance(child, LFGParseTreeNodeF): labels_to_remove -= set(child.get_all_functional_labels().keys()) for label in labels_to_remove: node.remove_functional_label(label) # then recursively remove constraints from children nodes for child in node.children: if isinstance(child, LFGParseTreeNodeF): remove_unused_constraints(child)
[docs]def parse_lexicon(file): """ Given a file path, read the lexicon from the file and return the lexicon entries as a dictionary. :param file: the path of the file containing the lexicon :return: a dictionary of lexicon entries """ entries = {} with open(file) as f: for line in f: if line.startswith("//"): continue if line.startswith("_"): entry_type, fields = line.strip().split(" ") entries[entry_type] = fields continue if "|" in line: entries_raw = line.strip().split("|") for e in entries_raw: word, category_raw, f_struct = e.strip().split(" ") word = word.replace("\"", "") category = category_raw.strip("[").strip("]") f_struct = f_struct.strip("[").strip("]") f_struct = f_struct.split(",") f_struct = {f.split(":")[0].strip(): f.split(":")[1].strip() for f in f_struct} if word in entries: entries[word].append({"category": category, "f_struct": f_struct}) else: entries[word] = [{"category": category, "f_struct": f_struct}] else: word, category_raw, f_struct = line.strip().split(" ") word = word.replace("\"", "") category = category_raw.strip("[").strip("]") f_struct = f_struct.strip("[").strip("]") f_struct = f_struct.split(",") f_struct = {f.split(":")[0].strip(): f.split(":")[1].strip() for f in f_struct} entries[word] = [{"category": category, "f_struct": f_struct}] return entries
[docs]def parse_xlfg_rule(rule: str) -> Tuple[str, List[str], Dict[str, str], Dict[str, Dict[str, str]]]: """ Given a string representation of a XLFG phrase structure rule, returns a tuple of the rule in the format "LHS → RHS", c-structure constraints, and f-structure constraints. """ # Split the rule by '{' and '}' to separate the rule and the constraints rule_parts = re.split(r'[{}]', rule) # Assign the first part to lhs and rhs lhs, rhs = rule_parts[0].strip().split("→") # Assign the second part to c-structure constraints c_constraints = {} if len(rule_parts) > 1: c_constraints = parse_c_constraints(rule_parts[1]) # Assign the third part to f-structure constraints f_constraints = {} if len(rule_parts) > 2: f_constraints = parse_f_constraints(rule_parts[2]) rhs = [x.strip() for x in rhs.split()] return lhs.strip(), rhs
def parse_grammar(file: str) -> Dict[str, List[Tuple[str, List[str], Dict[str, str], Dict[str, Dict[str, str]]]]]: grammar = {} with open(file) as f: rules = f.read().split('\n') for rule in rules: rule_lhs, rule_rhs, rule_c_constraints, rule_f_constraints = parse_xlfg_rule(rule) if rule_lhs in grammar: grammar[rule_lhs].append((rule_rhs, rule_c_constraints, rule_f_constraints)) else: grammar[rule_lhs] = [(rule_rhs, rule_c_constraints, rule_f_constraints)] return grammar