Added possibility to filter through separate attributes of feats again + added README.md

This commit is contained in:
Luka 2019-12-18 12:57:15 +01:00
parent 9cd3758362
commit 258d52e799
6 changed files with 65 additions and 14 deletions

28
README.md Normal file
View File

@ -0,0 +1,28 @@
# dependency parsetree
## Linux installation and execution
### Installation
Install python 3 on your sistem.
Run the following commands in terminal:
```bash
cd <PATH TO PROJECT DIRECTORY>
pip3 install -r requirements.txt
```
### Execution
Set up search parameters in `.ini` file.
Execute extraction by first moving to project directory with:
```bash
cd <PATH TO PROJECT DIRECTORY>
```
And later executing script with:
```bash
python3 dependency-parsetree.py --config_file=<PATH TO .ini file>
```
Example:
```bash
python3 dependency-parsetree.py --config_file=config_template.ini
```

24
Tree.py
View File

@ -1,8 +1,6 @@
import sys import sys
from copy import copy from copy import copy
from pyconll.unit import Token
from Result import Result from Result import Result
from ResultNode import ResultNode from ResultNode import ResultNode
from ResultTree import ResultTree from ResultTree import ResultTree
@ -61,15 +59,15 @@ class Tree(object):
def set_parent(self, parent): def set_parent(self, parent):
self.parent = parent self.parent = parent
# def fits_static_requirements_feats(self, query_tree): def fits_static_requirements_feats(self, query_tree):
# if 'feats' not in query_tree: if 'feats_detailed' not in query_tree:
# return True return True
#
# for feat in query_tree['feats'].keys(): for feat in query_tree['feats_detailed'].keys():
# if feat not in self.feats or query_tree['feats'][feat] != next(iter(self.feats[feat].values())).get_value(): if feat not in self.feats_detailed or query_tree['feats_detailed'][feat] != next(iter(self.feats_detailed[feat].values())).get_value():
# return False return False
#
# return True return True
def fits_permanent_requirements(self, filters): def fits_permanent_requirements(self, filters):
@ -109,8 +107,8 @@ class Tree(object):
('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \ ('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \
('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \ ('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \
('feats' not in query_tree or query_tree['feats'] == self.feats.get_value()) and \ ('feats' not in query_tree or query_tree['feats'] == self.feats.get_value()) and \
(not filters['complete_tree_type'] or (len(self.children) == 0 and 'children' not in query_tree) or ('children' in query_tree and len(self.children) == len(query_tree['children']))) (not filters['complete_tree_type'] or (len(self.children) == 0 and 'children' not in query_tree) or ('children' in query_tree and len(self.children) == len(query_tree['children']))) and \
# self.fits_static_requirements_feats(query_tree) self.fits_static_requirements_feats(query_tree)
def generate_children_queries(self, all_query_indices, children): def generate_children_queries(self, all_query_indices, children):
partial_results = {} partial_results = {}

23
config_template.ini Normal file
View File

@ -0,0 +1,23 @@
[settings]
input = /media/luka/Portable Disk/Datasets/dependency_treeparse/ssj500k.conllu/sl_ssj-ud_v2.4.conllu
output = results/out.tsv
internal_saves = ./internal_saves
;ngrams = 0
;tree_size = 3
;lines_threshold = 10000
;frequency_threshold = 1
association_measures = no
print_root = no
node_order = free
tree_type = all
dependency_type = unlabeled
nodes_number = yes
;label_whitelist = nsubj|obj|obl
;root_whitelist = lemma=leto
;root_whitelist = upos=NOUN&Case=Nom|upos=ADJ&Degree=Sup
;root_whitelist = lemma=mati&Case=Acc|lemma=lep&Degree=Sup
; analyze_type options: form, deprel, lemma, upos, xpos
;node_type = form
cpu_cores = 12
node_type = form
query = upos=NOUN > _

View File

@ -1,3 +1,4 @@
#!/usr/bin/env python
import argparse import argparse
import configparser import configparser
import copy import copy

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
pyconll==2.1.1

2
run.sh
View File

@ -1,3 +1,3 @@
source venv/bin/activate source venv/bin/activate
python dependency-parsetree.py --config_file="$1" python3 dependency-parsetree.py --config_file="$1"
deactivate deactivate