diff --git a/README.md b/README.md new file mode 100644 index 0000000..f5f6276 --- /dev/null +++ b/README.md @@ -0,0 +1,28 @@ +# dependency parsetree +## Linux installation and execution +### Installation +Install python 3 on your sistem. + +Run the following commands in terminal: +```bash +cd +pip3 install -r requirements.txt +``` + +### Execution +Set up search parameters in `.ini` file. + +Execute extraction by first moving to project directory with: +```bash +cd +``` + +And later executing script with: +```bash +python3 dependency-parsetree.py --config_file= +``` + +Example: +```bash +python3 dependency-parsetree.py --config_file=config_template.ini +``` diff --git a/Tree.py b/Tree.py index daefe31..069ede5 100644 --- a/Tree.py +++ b/Tree.py @@ -1,8 +1,6 @@ import sys from copy import copy -from pyconll.unit import Token - from Result import Result from ResultNode import ResultNode from ResultTree import ResultTree @@ -61,15 +59,15 @@ class Tree(object): def set_parent(self, parent): self.parent = parent - # def fits_static_requirements_feats(self, query_tree): - # if 'feats' not in query_tree: - # return True - # - # for feat in query_tree['feats'].keys(): - # if feat not in self.feats or query_tree['feats'][feat] != next(iter(self.feats[feat].values())).get_value(): - # return False - # - # return True + def fits_static_requirements_feats(self, query_tree): + if 'feats_detailed' not in query_tree: + return True + + for feat in query_tree['feats_detailed'].keys(): + if feat not in self.feats_detailed or query_tree['feats_detailed'][feat] != next(iter(self.feats_detailed[feat].values())).get_value(): + return False + + return True def fits_permanent_requirements(self, filters): @@ -109,8 +107,8 @@ class Tree(object): ('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \ ('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \ ('feats' not in query_tree or query_tree['feats'] == self.feats.get_value()) and \ - (not filters['complete_tree_type'] or (len(self.children) == 0 and 'children' not in query_tree) or ('children' in query_tree and len(self.children) == len(query_tree['children']))) - # self.fits_static_requirements_feats(query_tree) + (not filters['complete_tree_type'] or (len(self.children) == 0 and 'children' not in query_tree) or ('children' in query_tree and len(self.children) == len(query_tree['children']))) and \ + self.fits_static_requirements_feats(query_tree) def generate_children_queries(self, all_query_indices, children): partial_results = {} diff --git a/config_template.ini b/config_template.ini new file mode 100644 index 0000000..0ff7abc --- /dev/null +++ b/config_template.ini @@ -0,0 +1,23 @@ +[settings] +input = /media/luka/Portable Disk/Datasets/dependency_treeparse/ssj500k.conllu/sl_ssj-ud_v2.4.conllu +output = results/out.tsv +internal_saves = ./internal_saves +;ngrams = 0 +;tree_size = 3 +;lines_threshold = 10000 +;frequency_threshold = 1 +association_measures = no +print_root = no +node_order = free +tree_type = all +dependency_type = unlabeled +nodes_number = yes +;label_whitelist = nsubj|obj|obl +;root_whitelist = lemma=leto +;root_whitelist = upos=NOUN&Case=Nom|upos=ADJ&Degree=Sup +;root_whitelist = lemma=mati&Case=Acc|lemma=lep&Degree=Sup +; analyze_type options: form, deprel, lemma, upos, xpos +;node_type = form +cpu_cores = 12 +node_type = form +query = upos=NOUN > _ diff --git a/dependency-parsetree.py b/dependency-parsetree.py index 37c99da..f1e5945 100644 --- a/dependency-parsetree.py +++ b/dependency-parsetree.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python import argparse import configparser import copy diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..25db5b5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pyconll==2.1.1 diff --git a/run.sh b/run.sh index fa55388..d5ed1c6 100755 --- a/run.sh +++ b/run.sh @@ -1,3 +1,3 @@ source venv/bin/activate -python dependency-parsetree.py --config_file="$1" +python3 dependency-parsetree.py --config_file="$1" deactivate