From 19d3e38dcbee5f604ebd9b6d0a195af430e896c9 Mon Sep 17 00:00:00 2001 From: LukeItUp Date: Thu, 3 Sep 2020 10:29:24 +0200 Subject: [PATCH] better collocation export --- src/lib/structure_conversions.py | 7 +-- src/message/ske_messages.py | 77 +++++++++++++++++++++++++------- 2 files changed, 64 insertions(+), 20 deletions(-) diff --git a/src/lib/structure_conversions.py b/src/lib/structure_conversions.py index 5852c6f..4ba8a8e 100644 --- a/src/lib/structure_conversions.py +++ b/src/lib/structure_conversions.py @@ -24,6 +24,7 @@ def build_structure_conversions(): if line[1] == "struktura": continue + vto_structure = line[1].strip().split(">")[1].split("<")[0] vto_name = line[2].strip() vto_id = line[4].strip() @@ -31,14 +32,14 @@ def build_structure_conversions(): continue vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + "$" - structure_conversions.append((__new__(RegExp(vfrom, 'u')), vto_name, vto_id)) + structure_conversions.append((__new__(RegExp(vfrom, 'u')), vto_name, vto_structure, vto_id)) def convert_structure(structure, type): if structure_conversions is None: build_structure_conversions() - for vfrom, vto_name, vto_id in structure_conversions: + for vfrom, vto_name, vto_structure, vto_id in structure_conversions: match = structure.match(vfrom) # fix for ids 65, 66, 67 which instead matched with 64 if match and vto_id == '64' and '-s' in type: @@ -57,7 +58,7 @@ def convert_structure(structure, type): result = structure.replace(vfrom, vto_name).strip() __pragma__('alias', 'replace', "py_replace") - return result, vto_id + return result, vto_structure, vto_id window.console.log("Unknown structure: ", structure) return 'N/A', '/' diff --git a/src/message/ske_messages.py b/src/message/ske_messages.py index f54e636..7faa743 100644 --- a/src/message/ske_messages.py +++ b/src/message/ske_messages.py @@ -59,7 +59,8 @@ class SkeCollocation: def __init__(self, data): self.word = data.word self.frequency = data.count - self.structure_name, self.structure_id = convert_structure(data.gramrel, data.lempos) + self.gramrel = data.gramrel + self.structure_name, self.structure, self.structure_id = convert_structure(data.gramrel, data.lempos) self.other = {"score": data.score, "cm": data.cm} @@ -145,9 +146,21 @@ class SkeModal(ClickMessage): # we get the data, we have to match it with available data on our gf2 examples API match_gf2_examples(data, page_num, search_term, ske_index) elif type(data[0]) is SkeCollocation: - data.sort(key= lambda x: x.structure_name) - data = [x for x in data if x.structure_name is not "N/A"] - + # filtering, grouping and sorting data + data.sort(key= lambda x: float(x.other["score"]), reverse=True) + _data = [] + while len(data) > 0: + max_item = data.pop(0) # max(data, key= lambda x: x.other["score"]) + _data.append(max_item) + for item in data: + if "N/A" in item.structure_name: + data.remove(item) + elif item.structure_name.strip() == max_item.structure_name.strip(): + _data.append(item) + for delete_item in _data: + if delete_item in data: + data.remove(delete_item) + data = _data model.modal_set(lambda: modals.ske_list( search_term, data, page_num, model.entry.senses, model.ske.request_kinds)) @@ -260,17 +273,47 @@ class SkeInsert(DataChgClickMessage): new_collocation.inner.other_attributes["frequency"] = example.frequency new_collocation.inner.type = "collocation" - lex_left = ComponentLexeme() - lex_left.text = "" - lex_left.role = None - - lex_mid = ComponentLexeme() - lex_mid.text = example.word - lex_mid.role = "collocate" - - lex_right = ComponentLexeme() - lex_right.text = "" - lex_right.role = None - - new_collocation.components.extend([lex_left, lex_mid, lex_right]) + headword = document.getElementById("ske-search").value + lexemes = [] + structure_name = example.structure_name.split("-") + gramrel = example.gramrel.split("_") + structure = example.structure.split(" ") + structure.append("") # Bad fix: we have to add something for structure l-gg-ggn + + for i in range(len(structure_name)): + console.log("current:", i, "\ngramrel:", gramrel[i], "\nstructure:", structure[i], "\nstruc_name:", structure_name[i]) + + lex = ComponentLexeme() + structure[i] = structure[i].replace("Inf-", "") + + # take care of negations "ne" + if "Neg-" in structure[i]: + structure[i] = structure[i].replace("Neg-", "") + negation_flag = True + n_lex = ComponentLexeme() + n_lex.text = "ne" + n_lex.role = "other" + lexemes.append(n_lex) + + if structure[i] is "": + continue # skipping bcs of fix + elif "Vez-gbz" in structure[i]: + lex.text = "je" + lex.role = "other" + elif structure_name[i] in ["d", "vd", "zp"]: + lex.text = gramrel[i] + lex.text = lex.text.replace("-d", "").replace("%", "") + lex.role = "other" + elif structure_name[i] is "vp": + lex.text = structure[i] + lex.role = "other" + elif structure[i][0] in ["S", "G", "P", "R"]: + lex.text = headword + lex.role = "headword" + else: + lex.text = example.word + lex.role = "collocate" + lexemes.append(lex) + + new_collocation.components.extend(lexemes) return new_collocation