Modified readme.md + Removed obligatory sloleks_db + Added frequency_limit and sorted parameters in recalculate_statistics.py

This commit is contained in:
2020-09-02 10:53:45 +02:00
parent 41952738ed
commit 1b0e6a27eb
6 changed files with 4841 additions and 15 deletions

View File

@@ -71,9 +71,7 @@ class WordFormAnyCR(ComponentRepresentation):
agreements_matched = [agr.match(word_msd) for agr in self.agreement]
# in case all agreements do not match try to get data from sloleks and change properly
if not all(agreements_matched):
if sloleks_db is None:
raise Exception('sloleks_db not properly setup!')
if sloleks_db is not None and not all(agreements_matched):
for i, agr in enumerate(self.agreement):
if not agr.match(word_msd):
msd, lemma, text = sloleks_db.get_word_form(agr.lemma, agr.msd(), agr.data, align_msd=word_msd)
@@ -142,9 +140,7 @@ class WordFormMsdCR(WordFormAnyCR):
super().add_word(word)
def _render(self, sloleks_db=None):
if len(self.words) == 0:
if sloleks_db is None:
raise Exception('sloleks_db not properly setup!')
if len(self.words) == 0 and sloleks_db is not None:
msd, lemma, text = sloleks_db.get_word_form(self.lemma, self.msd(), self.data)
if msd is not None:
self.words.append(WordDummy(msd, lemma, text))

View File

@@ -80,9 +80,13 @@ def main(args):
# figure out representations!
if args.out or args.out_no_stat:
sloleks_db = SloleksDatabase(args.sloleks_db, args.load_sloleks)
if args.sloleks_db is not None:
sloleks_db = SloleksDatabase(args.sloleks_db, args.load_sloleks)
else:
sloleks_db = None
match_store.set_representations(word_stats, structures, sloleks_db=sloleks_db)
sloleks_db.close()
if args.sloleks_db is not None:
sloleks_db.close()
Writer.make_output_writer(args, max_num_components, match_store, word_stats).write_out(
structures, match_store)
@@ -102,7 +106,7 @@ if __name__ == '__main__':
help='Structures definitions in xml file')
parser.add_argument('input',
help='input file in (gz or xml currently). If none, then just database is loaded', nargs='*')
parser.add_argument('--sloleks_db', type=str, help='Sloleks database credentials')
parser.add_argument('--sloleks_db', type=str, default=None, help='Sloleks database credentials')
parser.add_argument('--out',
help='Classic output file')
parser.add_argument('--out-no-stat',