diff --git a/.gitignore b/.gitignore index 3a0f11c..6ac65c6 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ __pycache__ prev old data +venv diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..6593b89 --- /dev/null +++ b/run.sh @@ -0,0 +1 @@ +pypy3 src/wani.py data/Kolokacije_strukture_JOS-32-representation_3D_08_1.xml data/input --out data/output --sloleks_db 'superbaza:A)2U&+3Vfd$Fg]Gb:kolokacije:127.0.0.1' --collocation_sentence_map_dest data/collocation_sentence_mapper --db /mnt/tmp/mysql-wani --multiple-output diff --git a/src/loader.py b/src/loader.py index 48b25b7..cd8bd83 100644 --- a/src/loader.py +++ b/src/loader.py @@ -20,7 +20,7 @@ def load_files(args, database): do_msd_translate = not args.no_msd_translate if len(filenames) == 1 and os.path.isdir(filenames[0]): - filenames = [os.path.join(filenames[0], file) for file in os.listdir(filenames[0])] + filenames = [os.path.join(filenames[0], file) for file in os.listdir(filenames[0]) if file[-5:] != '.zstd'] database.init("CREATE TABLE Files ( filename varchar(2048) )") diff --git a/src/representation.py b/src/representation.py index c059415..2a7842a 100644 --- a/src/representation.py +++ b/src/representation.py @@ -4,7 +4,7 @@ from collections import Counter from codes_tagset import TAGSET, CODES from word import WordMsdOnly -from src.word import WordDummy +from word import WordDummy class ComponentRepresentation: diff --git a/src/sloleks_db.py b/src/sloleks_db.py index 7f3897f..e20b537 100644 --- a/src/sloleks_db.py +++ b/src/sloleks_db.py @@ -1,8 +1,11 @@ +from psycopg2cffi import compat +compat.register() + from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import Session, aliased from sqlalchemy import create_engine -from src.codes_tagset import TAGSET, CODES, CODES_TRANSLATION +from codes_tagset import TAGSET, CODES, CODES_TRANSLATION class SloleksDatabase: diff --git a/src/wani.py b/src/wani.py index de35eab..f10b5ef 100644 --- a/src/wani.py +++ b/src/wani.py @@ -21,7 +21,7 @@ from loader import load_files from database import Database from time_info import TimeInfo -from src.postprocessor import Postprocessor +from postprocessor import Postprocessor def match_file(words, structures, postprocessor):