|
|
|
@ -1,46 +1,12 @@
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
from ast import literal_eval
|
|
|
|
|
|
|
|
|
|
from sqlalchemy.ext.declarative import declarative_base
|
|
|
|
|
from sqlalchemy.orm import Session, aliased
|
|
|
|
|
from sqlalchemy import create_engine
|
|
|
|
|
from sqlalchemy import func
|
|
|
|
|
|
|
|
|
|
from match import StructureMatch
|
|
|
|
|
from representation_assigner import RepresentationAssigner
|
|
|
|
|
from progress_bar import progress
|
|
|
|
|
|
|
|
|
|
# Lexeme = None
|
|
|
|
|
# LexemeFeature = None
|
|
|
|
|
# SyntacticStructure = None
|
|
|
|
|
# StructureComponent = None
|
|
|
|
|
# Feature = None
|
|
|
|
|
# LexicalUnitLexeme = None
|
|
|
|
|
# LexicalUnit = None
|
|
|
|
|
# LexicalUnitType = None
|
|
|
|
|
# Category = None
|
|
|
|
|
# Sense = None
|
|
|
|
|
# Measure = None
|
|
|
|
|
# LexicalUnitMeasure = None
|
|
|
|
|
# Corpus = None
|
|
|
|
|
# Definition = None
|
|
|
|
|
# WordForm = None
|
|
|
|
|
# WordFormFeature = None
|
|
|
|
|
# FormRepresentation = None
|
|
|
|
|
|
|
|
|
|
from src.codes_tagset import TAGSET, CODES, CODES_TRANSLATION
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SloleksDatabase:
|
|
|
|
|
def __init__(self, db):
|
|
|
|
|
# self.db = db
|
|
|
|
|
# self.dispersions = {}
|
|
|
|
|
# self.min_freq = args.min_freq
|
|
|
|
|
|
|
|
|
|
# self.db.init("""CREATE TABLE Colocations (
|
|
|
|
|
# colocation_id INTEGER PRIMARY KEY,
|
|
|
|
|
# structure_id varchar(8),
|
|
|
|
|
# key varchar(256))
|
|
|
|
|
# """)
|
|
|
|
|
global Lexeme, LexemeFeature, SyntacticStructure, StructureComponent, Feature, LexicalUnitLexeme, LexicalUnit, LexicalUnitType, Category, Sense, Measure, LexicalUnitMeasure, Corpus, Definition, WordForm, WordFormFeature, FormRepresentation
|
|
|
|
|
[db_user, db_password, db_database, db_host] = db.split(':')
|
|
|
|
|
|
|
|
|
@ -130,14 +96,15 @@ class SloleksDatabase:
|
|
|
|
|
def get_word_form(self, lemma, msd, data, align_msd=False):
|
|
|
|
|
# modify msd as required
|
|
|
|
|
msd = list(msd)
|
|
|
|
|
|
|
|
|
|
if not align_msd and 'msd' in data:
|
|
|
|
|
if 'msd' in data:
|
|
|
|
|
for key, value in data['msd'].items():
|
|
|
|
|
t = msd[0]
|
|
|
|
|
v = TAGSET[t].index(key.lower())
|
|
|
|
|
if v + 1 >= len(msd):
|
|
|
|
|
msd = msd + ['-' for _ in range(v - len(msd) + 2)]
|
|
|
|
|
msd[v + 1] = CODES[value]
|
|
|
|
|
|
|
|
|
|
elif 'agreement' in data:
|
|
|
|
|
if align_msd and 'agreement' in data:
|
|
|
|
|
align_msd = list(align_msd)
|
|
|
|
|
t_align_msd = align_msd[0]
|
|
|
|
|
t = msd[0]
|
|
|
|
@ -146,37 +113,24 @@ class SloleksDatabase:
|
|
|
|
|
v_align_msd = TAGSET[t_align_msd].index(att.lower())
|
|
|
|
|
v = TAGSET[t].index(att.lower())
|
|
|
|
|
# fix for verbs with short msds
|
|
|
|
|
if v >= len(msd):
|
|
|
|
|
return None, None, None
|
|
|
|
|
# if v >= len(msd) and t == 'V' and att == 'number':
|
|
|
|
|
# if len(msd) == 4:
|
|
|
|
|
# msd += ['3']
|
|
|
|
|
# if len(msd) == 5:
|
|
|
|
|
# msd += ['_']
|
|
|
|
|
# try:
|
|
|
|
|
if v + 1 >= len(msd):
|
|
|
|
|
msd = msd + ['-' for _ in range(v - len(msd) + 2)]
|
|
|
|
|
# return None, None, None
|
|
|
|
|
|
|
|
|
|
msd[v + 1] = align_msd[v_align_msd + 1]
|
|
|
|
|
# except:
|
|
|
|
|
# print('here')
|
|
|
|
|
|
|
|
|
|
# msd = list(msd)
|
|
|
|
|
decypher_msd = self.decypher_msd(msd)
|
|
|
|
|
|
|
|
|
|
if not decypher_msd:
|
|
|
|
|
return None, None, None
|
|
|
|
|
|
|
|
|
|
wfs = [aliased(WordFormFeature) for _ in decypher_msd]
|
|
|
|
|
# wf1 = aliased(WordFormFeature)
|
|
|
|
|
# wf2 = aliased(WordFormFeature)
|
|
|
|
|
# wf3 = aliased(WordFormFeature)
|
|
|
|
|
query_preposition = self.session.query(FormRepresentation.form) \
|
|
|
|
|
.join(WordForm, WordForm.id == FormRepresentation.word_form_id) \
|
|
|
|
|
.join(Lexeme, Lexeme.id == WordForm.lexeme_id)
|
|
|
|
|
|
|
|
|
|
for wf in wfs:
|
|
|
|
|
query_preposition = query_preposition.join(wf, wf.word_form_id == WordForm.id)
|
|
|
|
|
# .join(wf1, wf1.word_form_id == WordForm.id) \
|
|
|
|
|
# .join(wf2, wf2.word_form_id == WordForm.id) \
|
|
|
|
|
# .join(wf3, wf3.word_form_id == WordForm.id) \
|
|
|
|
|
|
|
|
|
|
query_preposition = query_preposition.filter(Lexeme.lemma == lemma)
|
|
|
|
|
|
|
|
|
@ -186,6 +140,4 @@ class SloleksDatabase:
|
|
|
|
|
pattern_translation_hws = query_preposition.all()
|
|
|
|
|
if len(pattern_translation_hws) > 0:
|
|
|
|
|
return ''.join(msd), lemma, pattern_translation_hws[0][0]
|
|
|
|
|
# pattern_translation_hws = [el[0] for el in query_preposition.all()]
|
|
|
|
|
return None, None, None
|
|
|
|
|
# return pattern_translation_hws
|
|
|
|
|