Added possibility to filter through separate attributes of feats again + added README.md
This commit is contained in:
parent
9cd3758362
commit
258d52e799
28
README.md
Normal file
28
README.md
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
# dependency parsetree
|
||||||
|
## Linux installation and execution
|
||||||
|
### Installation
|
||||||
|
Install python 3 on your sistem.
|
||||||
|
|
||||||
|
Run the following commands in terminal:
|
||||||
|
```bash
|
||||||
|
cd <PATH TO PROJECT DIRECTORY>
|
||||||
|
pip3 install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### Execution
|
||||||
|
Set up search parameters in `.ini` file.
|
||||||
|
|
||||||
|
Execute extraction by first moving to project directory with:
|
||||||
|
```bash
|
||||||
|
cd <PATH TO PROJECT DIRECTORY>
|
||||||
|
```
|
||||||
|
|
||||||
|
And later executing script with:
|
||||||
|
```bash
|
||||||
|
python3 dependency-parsetree.py --config_file=<PATH TO .ini file>
|
||||||
|
```
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```bash
|
||||||
|
python3 dependency-parsetree.py --config_file=config_template.ini
|
||||||
|
```
|
24
Tree.py
24
Tree.py
|
@ -1,8 +1,6 @@
|
||||||
import sys
|
import sys
|
||||||
from copy import copy
|
from copy import copy
|
||||||
|
|
||||||
from pyconll.unit import Token
|
|
||||||
|
|
||||||
from Result import Result
|
from Result import Result
|
||||||
from ResultNode import ResultNode
|
from ResultNode import ResultNode
|
||||||
from ResultTree import ResultTree
|
from ResultTree import ResultTree
|
||||||
|
@ -61,15 +59,15 @@ class Tree(object):
|
||||||
def set_parent(self, parent):
|
def set_parent(self, parent):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
|
|
||||||
# def fits_static_requirements_feats(self, query_tree):
|
def fits_static_requirements_feats(self, query_tree):
|
||||||
# if 'feats' not in query_tree:
|
if 'feats_detailed' not in query_tree:
|
||||||
# return True
|
return True
|
||||||
#
|
|
||||||
# for feat in query_tree['feats'].keys():
|
for feat in query_tree['feats_detailed'].keys():
|
||||||
# if feat not in self.feats or query_tree['feats'][feat] != next(iter(self.feats[feat].values())).get_value():
|
if feat not in self.feats_detailed or query_tree['feats_detailed'][feat] != next(iter(self.feats_detailed[feat].values())).get_value():
|
||||||
# return False
|
return False
|
||||||
#
|
|
||||||
# return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def fits_permanent_requirements(self, filters):
|
def fits_permanent_requirements(self, filters):
|
||||||
|
@ -109,8 +107,8 @@ class Tree(object):
|
||||||
('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \
|
('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \
|
||||||
('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \
|
('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \
|
||||||
('feats' not in query_tree or query_tree['feats'] == self.feats.get_value()) and \
|
('feats' not in query_tree or query_tree['feats'] == self.feats.get_value()) and \
|
||||||
(not filters['complete_tree_type'] or (len(self.children) == 0 and 'children' not in query_tree) or ('children' in query_tree and len(self.children) == len(query_tree['children'])))
|
(not filters['complete_tree_type'] or (len(self.children) == 0 and 'children' not in query_tree) or ('children' in query_tree and len(self.children) == len(query_tree['children']))) and \
|
||||||
# self.fits_static_requirements_feats(query_tree)
|
self.fits_static_requirements_feats(query_tree)
|
||||||
|
|
||||||
def generate_children_queries(self, all_query_indices, children):
|
def generate_children_queries(self, all_query_indices, children):
|
||||||
partial_results = {}
|
partial_results = {}
|
||||||
|
|
23
config_template.ini
Normal file
23
config_template.ini
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
[settings]
|
||||||
|
input = /media/luka/Portable Disk/Datasets/dependency_treeparse/ssj500k.conllu/sl_ssj-ud_v2.4.conllu
|
||||||
|
output = results/out.tsv
|
||||||
|
internal_saves = ./internal_saves
|
||||||
|
;ngrams = 0
|
||||||
|
;tree_size = 3
|
||||||
|
;lines_threshold = 10000
|
||||||
|
;frequency_threshold = 1
|
||||||
|
association_measures = no
|
||||||
|
print_root = no
|
||||||
|
node_order = free
|
||||||
|
tree_type = all
|
||||||
|
dependency_type = unlabeled
|
||||||
|
nodes_number = yes
|
||||||
|
;label_whitelist = nsubj|obj|obl
|
||||||
|
;root_whitelist = lemma=leto
|
||||||
|
;root_whitelist = upos=NOUN&Case=Nom|upos=ADJ&Degree=Sup
|
||||||
|
;root_whitelist = lemma=mati&Case=Acc|lemma=lep&Degree=Sup
|
||||||
|
; analyze_type options: form, deprel, lemma, upos, xpos
|
||||||
|
;node_type = form
|
||||||
|
cpu_cores = 12
|
||||||
|
node_type = form
|
||||||
|
query = upos=NOUN > _
|
|
@ -1,3 +1,4 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
import argparse
|
import argparse
|
||||||
import configparser
|
import configparser
|
||||||
import copy
|
import copy
|
||||||
|
|
1
requirements.txt
Normal file
1
requirements.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
pyconll==2.1.1
|
Loading…
Reference in New Issue
Block a user