Added possibility to filter through separate attributes of feats again + added README.md

This commit is contained in:
Luka 2019-12-18 12:57:15 +01:00
parent 9cd3758362
commit 258d52e799
6 changed files with 65 additions and 14 deletions

28
README.md Normal file
View File

@ -0,0 +1,28 @@
# dependency parsetree
## Linux installation and execution
### Installation
Install python 3 on your sistem.
Run the following commands in terminal:
```bash
cd <PATH TO PROJECT DIRECTORY>
pip3 install -r requirements.txt
```
### Execution
Set up search parameters in `.ini` file.
Execute extraction by first moving to project directory with:
```bash
cd <PATH TO PROJECT DIRECTORY>
```
And later executing script with:
```bash
python3 dependency-parsetree.py --config_file=<PATH TO .ini file>
```
Example:
```bash
python3 dependency-parsetree.py --config_file=config_template.ini
```

24
Tree.py
View File

@ -1,8 +1,6 @@
import sys
from copy import copy
from pyconll.unit import Token
from Result import Result
from ResultNode import ResultNode
from ResultTree import ResultTree
@ -61,15 +59,15 @@ class Tree(object):
def set_parent(self, parent):
self.parent = parent
# def fits_static_requirements_feats(self, query_tree):
# if 'feats' not in query_tree:
# return True
#
# for feat in query_tree['feats'].keys():
# if feat not in self.feats or query_tree['feats'][feat] != next(iter(self.feats[feat].values())).get_value():
# return False
#
# return True
def fits_static_requirements_feats(self, query_tree):
if 'feats_detailed' not in query_tree:
return True
for feat in query_tree['feats_detailed'].keys():
if feat not in self.feats_detailed or query_tree['feats_detailed'][feat] != next(iter(self.feats_detailed[feat].values())).get_value():
return False
return True
def fits_permanent_requirements(self, filters):
@ -109,8 +107,8 @@ class Tree(object):
('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \
('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \
('feats' not in query_tree or query_tree['feats'] == self.feats.get_value()) and \
(not filters['complete_tree_type'] or (len(self.children) == 0 and 'children' not in query_tree) or ('children' in query_tree and len(self.children) == len(query_tree['children'])))
# self.fits_static_requirements_feats(query_tree)
(not filters['complete_tree_type'] or (len(self.children) == 0 and 'children' not in query_tree) or ('children' in query_tree and len(self.children) == len(query_tree['children']))) and \
self.fits_static_requirements_feats(query_tree)
def generate_children_queries(self, all_query_indices, children):
partial_results = {}

23
config_template.ini Normal file
View File

@ -0,0 +1,23 @@
[settings]
input = /media/luka/Portable Disk/Datasets/dependency_treeparse/ssj500k.conllu/sl_ssj-ud_v2.4.conllu
output = results/out.tsv
internal_saves = ./internal_saves
;ngrams = 0
;tree_size = 3
;lines_threshold = 10000
;frequency_threshold = 1
association_measures = no
print_root = no
node_order = free
tree_type = all
dependency_type = unlabeled
nodes_number = yes
;label_whitelist = nsubj|obj|obl
;root_whitelist = lemma=leto
;root_whitelist = upos=NOUN&Case=Nom|upos=ADJ&Degree=Sup
;root_whitelist = lemma=mati&Case=Acc|lemma=lep&Degree=Sup
; analyze_type options: form, deprel, lemma, upos, xpos
;node_type = form
cpu_cores = 12
node_type = form
query = upos=NOUN > _

View File

@ -1,3 +1,4 @@
#!/usr/bin/env python
import argparse
import configparser
import copy

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
pyconll==2.1.1

2
run.sh
View File

@ -1,3 +1,3 @@
source venv/bin/activate
python dependency-parsetree.py --config_file="$1"
python3 dependency-parsetree.py --config_file="$1"
deactivate