Fixed downloading and parsing of structures

Finish tommorow?
2020-07-01 23:16:02 +02:00 · 2020-07-01 23:16:02 +02:00 · f1f540e5da
commit f1f540e5da
parent 887eb37d20
4 changed files with 23 additions and 19 deletions
--- a/build/download_structure_conversions.sh
+++ b/build/download_structure_conversions.sh
@ -9,11 +9,12 @@ if [[ -z "${API_KEY}" ]]; then
 fi

 OUT_FILE=conversions.csv
+TEMP_FILE=$(mktemp)

-rm -f $OUT_FILE
+rm -r $OUT_FILE

-for fname in glagol pridevnik prislov samostalnik; do
-  curl -s "https://gitea.cjvt.si/api/v1/repos/redmine_projects/kolokacije/contents/resources/structure_conversions/$fname.csv?token=$API_KEY" | 
-    python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d >> $OUT_FILE
-done
+curl -s "https://gitea.cjvt.si/api/v1/repos/generic/data_admin/contents/resources/structure_conversions.csv?token=$API_KEY" -o $TEMP_FILE
+echo "wc: $(wc $TEMP_FILE)"
+cat $TEMP_FILE | python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d > $OUT_FILE

+rm -rf $TEMP_FILE
--- a/src/lib/structure_conversions.py
+++ b/src/lib/structure_conversions.py
@ -1,3 +1,5 @@
+from browser import window
+
 __pragma__ ('noanno')
 __pragma__ ('js', """
 var fs = require('fs');
@ -13,7 +15,7 @@ def build_structure_conversions():
    global structure_conversions
    structure_conversions = []
    
-    structure_conversions_raw = [line.split(",") for line in conversion_csv.split("\n")]
+    structure_conversions_raw = [line.split("|") for line in conversion_csv.split("\n")]
    for line in structure_conversions_raw:
        if min(len(line[0]), len(line[1])) == 0:
            continue
@ -22,28 +24,29 @@ def build_structure_conversions():
        if line[1] == "struktura":
            continue
        
-        vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-ZčžšČŽŠ-]+)") + "$"
-        vto = line[1].replace("<struktura>", "").replace("</struktura>", "").replace("%s", "$1").strip()
+        vto_name = line[2].strip()
+        vto_id = line[4].strip()
        
-        structure_conversions.append((__new__(RegExp(vfrom, 'u')), 
-                                      vto))
+        if 0 in (len(vto_name), len(vto_id)):
+            continue
+        
+        vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-ZčžšČŽŠ-]+)") + "$"
+        structure_conversions.append((__new__(RegExp(vfrom, 'u')), vto_name, vto_id))
        
        
 def convert_structure(structure):
    if structure_conversions is None:
        build_structure_conversions()
    
-    for vfrom, vto in structure_conversions:
+    for vfrom, vto_name, vto_id in structure_conversions:
        match = structure.match(vfrom)
        if match:
            # we need to remove replace alias here as we want to use javascript's one
            __pragma__('noalias', 'replace')
-            result = structure.replace(vfrom, vto).strip()
+            result = structure.replace(vfrom, vto_name).strip()
            __pragma__('alias', 'replace', "py_replace")
            
-            # they said this also needs to be done - remove "-d$" from %s match
-            if len(match) > 1 and match[1].endswith("-d"):
-                result = result.replace(match[1], match[1][:-2])
-            return result
+            return result, vto_id
    
+    window.console.log("Unknown structure: ", structure)
    return None
--- a/src/message/ske_messages.py
+++ b/src/message/ske_messages.py
@ -58,7 +58,7 @@ class SkeCollocation:
    def __init__(self, data):
        self.word = data.word
        self.frequency = data.count
-        self.structure_name = convert_structure(data.gramrel)
+        self.structure_name, self.structure_id = convert_structure(data.gramrel)
        
        self.other = {"score": data.score, "cm": data.cm}

@ -242,7 +242,7 @@ class SkeInsert(DataChgClickMessage):
        new_collocation = Example()
        new_collocation.inner = MultiwordExample()
        
-        new_collocation.inner.other_attributes["structureName"] = example.structure_name
+        new_collocation.inner.other_attributes["structure_id"] = example.structure_id
        new_collocation.inner.other_attributes["logDice"] = example.other["score"]
        new_collocation.inner.other_attributes["frequency"] = example.frequency
        new_collocation.inner.type = "collocation"
--- a/src/model/example/example.py
+++ b/src/model/example/example.py
@ -31,7 +31,7 @@ class Example(Data):
        example.edited = True
        example.inner = MultiwordExample()
        example.inner.cluster = ExampleClusters.first_empty_cluster()
-        example.inner.type = "type??"
+        example.inner.type = "grammaticalCombination"
        
        empty_component = ComponentLexeme()
        empty_component.role = "headword"