From 19d3e38dcbee5f604ebd9b6d0a195af430e896c9 Mon Sep 17 00:00:00 2001
From: LukeItUp <luka.kavcic95@gmail.com>
Date: Thu, 3 Sep 2020 10:29:24 +0200
Subject: [PATCH] better collocation export

---
 src/lib/structure_conversions.py |  7 +--
 src/message/ske_messages.py      | 77 +++++++++++++++++++++++++-------
 2 files changed, 64 insertions(+), 20 deletions(-)

diff --git a/src/lib/structure_conversions.py b/src/lib/structure_conversions.py
index 5852c6f..4ba8a8e 100644
--- a/src/lib/structure_conversions.py
+++ b/src/lib/structure_conversions.py
@@ -24,6 +24,7 @@ def build_structure_conversions():
         if line[1] == "struktura":
             continue
         
+        vto_structure = line[1].strip().split(">")[1].split("<")[0]
         vto_name = line[2].strip()
         vto_id = line[4].strip()
         
@@ -31,14 +32,14 @@ def build_structure_conversions():
             continue
         
         vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-ZčžšČŽŠ-]+)") + "$"
-        structure_conversions.append((__new__(RegExp(vfrom, 'u')), vto_name, vto_id))
+        structure_conversions.append((__new__(RegExp(vfrom, 'u')), vto_name, vto_structure, vto_id))
         
         
 def convert_structure(structure, type):
     if structure_conversions is None:
         build_structure_conversions()
     
-    for vfrom, vto_name, vto_id in structure_conversions:
+    for vfrom, vto_name, vto_structure, vto_id in structure_conversions:
         match = structure.match(vfrom)
         # fix for ids 65, 66, 67 which instead matched with 64
         if match and vto_id == '64' and '-s' in type:
@@ -57,7 +58,7 @@ def convert_structure(structure, type):
             result = structure.replace(vfrom, vto_name).strip()
             __pragma__('alias', 'replace', "py_replace")
 
-            return result, vto_id
+            return result, vto_structure, vto_id
     
     window.console.log("Unknown structure: ", structure)
     return 'N/A', '/'
diff --git a/src/message/ske_messages.py b/src/message/ske_messages.py
index f54e636..7faa743 100644
--- a/src/message/ske_messages.py
+++ b/src/message/ske_messages.py
@@ -59,7 +59,8 @@ class SkeCollocation:
     def __init__(self, data):
         self.word = data.word
         self.frequency = data.count
-        self.structure_name, self.structure_id = convert_structure(data.gramrel, data.lempos)
+        self.gramrel = data.gramrel
+        self.structure_name, self.structure, self.structure_id = convert_structure(data.gramrel, data.lempos)
         
         self.other = {"score": data.score, "cm": data.cm}
 
@@ -145,9 +146,21 @@ class SkeModal(ClickMessage):
                 # we get the data, we have to match it with available data on our gf2 examples API
                 match_gf2_examples(data, page_num, search_term, ske_index)
             elif type(data[0]) is SkeCollocation:
-                data.sort(key= lambda x: x.structure_name)
-                data = [x for x in data if x.structure_name is not "N/A"]
- 
+                # filtering, grouping and sorting data
+                data.sort(key= lambda x: float(x.other["score"]), reverse=True)
+                _data = []
+                while len(data) > 0:
+                    max_item = data.pop(0) # max(data, key= lambda x: x.other["score"])
+                    _data.append(max_item)
+                    for item in data:
+                        if "N/A" in item.structure_name:
+                            data.remove(item)
+                        elif item.structure_name.strip() == max_item.structure_name.strip():
+                            _data.append(item)
+                    for delete_item in _data:
+                        if delete_item in data:
+                            data.remove(delete_item)                          
+                data = _data
         model.modal_set(lambda: modals.ske_list(
             search_term, data, page_num, model.entry.senses, model.ske.request_kinds))
     
@@ -260,17 +273,47 @@ class SkeInsert(DataChgClickMessage):
         new_collocation.inner.other_attributes["frequency"] = example.frequency
         new_collocation.inner.type = "collocation"
         
-        lex_left = ComponentLexeme()
-        lex_left.text = ""
-        lex_left.role = None
-        
-        lex_mid = ComponentLexeme()
-        lex_mid.text = example.word
-        lex_mid.role = "collocate"
-        
-        lex_right = ComponentLexeme()
-        lex_right.text = ""
-        lex_right.role = None
-        
-        new_collocation.components.extend([lex_left, lex_mid, lex_right])
+        headword = document.getElementById("ske-search").value       
+        lexemes = []
+        structure_name = example.structure_name.split("-")
+        gramrel = example.gramrel.split("_")
+        structure = example.structure.split(" ")
+        structure.append("") # Bad fix: we have to add something for structure l-gg-ggn
+
+        for i in range(len(structure_name)):
+            console.log("current:", i, "\ngramrel:", gramrel[i], "\nstructure:", structure[i], "\nstruc_name:", structure_name[i])
+
+            lex = ComponentLexeme()
+            structure[i] = structure[i].replace("Inf-", "")
+            
+            # take care of negations "ne"
+            if "Neg-" in structure[i]:
+                structure[i] = structure[i].replace("Neg-", "")
+                negation_flag = True
+                n_lex = ComponentLexeme()
+                n_lex.text = "ne"
+                n_lex.role = "other"
+                lexemes.append(n_lex)
+
+            if structure[i] is "":
+                continue # skipping bcs of fix
+            elif "Vez-gbz" in structure[i]:
+                lex.text = "je"
+                lex.role = "other"                
+            elif structure_name[i] in ["d", "vd", "zp"]:
+                lex.text = gramrel[i]
+                lex.text = lex.text.replace("-d", "").replace("%", "")
+                lex.role = "other"
+            elif structure_name[i] is "vp":
+                lex.text = structure[i]
+                lex.role = "other"
+            elif structure[i][0] in ["S", "G", "P", "R"]:
+                lex.text = headword
+                lex.role = "headword"
+            else:
+                lex.text = example.word
+                lex.role = "collocate"
+            lexemes.append(lex)
+
+        new_collocation.components.extend(lexemes)
         return new_collocation