From f1f540e5da8d241904cebd738c69a14870d804e5 Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Wed, 1 Jul 2020 23:16:02 +0200 Subject: [PATCH] Fixed downloading and parsing of structures Finish tommorow? --- build/download_structure_conversions.sh | 11 ++++++----- src/lib/structure_conversions.py | 25 ++++++++++++++----------- src/message/ske_messages.py | 4 ++-- src/model/example/example.py | 2 +- 4 files changed, 23 insertions(+), 19 deletions(-) diff --git a/build/download_structure_conversions.sh b/build/download_structure_conversions.sh index 3716385..e313eb9 100755 --- a/build/download_structure_conversions.sh +++ b/build/download_structure_conversions.sh @@ -9,11 +9,12 @@ if [[ -z "${API_KEY}" ]]; then fi OUT_FILE=conversions.csv +TEMP_FILE=$(mktemp) -rm -f $OUT_FILE +rm -r $OUT_FILE -for fname in glagol pridevnik prislov samostalnik; do - curl -s "https://gitea.cjvt.si/api/v1/repos/redmine_projects/kolokacije/contents/resources/structure_conversions/$fname.csv?token=$API_KEY" | - python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d >> $OUT_FILE -done +curl -s "https://gitea.cjvt.si/api/v1/repos/generic/data_admin/contents/resources/structure_conversions.csv?token=$API_KEY" -o $TEMP_FILE +echo "wc: $(wc $TEMP_FILE)" +cat $TEMP_FILE | python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d > $OUT_FILE +rm -rf $TEMP_FILE diff --git a/src/lib/structure_conversions.py b/src/lib/structure_conversions.py index 0e7af0d..5e408fc 100644 --- a/src/lib/structure_conversions.py +++ b/src/lib/structure_conversions.py @@ -1,3 +1,5 @@ +from browser import window + __pragma__ ('noanno') __pragma__ ('js', """ var fs = require('fs'); @@ -13,7 +15,7 @@ def build_structure_conversions(): global structure_conversions structure_conversions = [] - structure_conversions_raw = [line.split(",") for line in conversion_csv.split("\n")] + structure_conversions_raw = [line.split("|") for line in conversion_csv.split("\n")] for line in structure_conversions_raw: if min(len(line[0]), len(line[1])) == 0: continue @@ -22,28 +24,29 @@ def build_structure_conversions(): if line[1] == "struktura": continue - vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + "$" - vto = line[1].replace("", "").replace("", "").replace("%s", "$1").strip() + vto_name = line[2].strip() + vto_id = line[4].strip() - structure_conversions.append((__new__(RegExp(vfrom, 'u')), - vto)) + if 0 in (len(vto_name), len(vto_id)): + continue + + vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + "$" + structure_conversions.append((__new__(RegExp(vfrom, 'u')), vto_name, vto_id)) def convert_structure(structure): if structure_conversions is None: build_structure_conversions() - for vfrom, vto in structure_conversions: + for vfrom, vto_name, vto_id in structure_conversions: match = structure.match(vfrom) if match: # we need to remove replace alias here as we want to use javascript's one __pragma__('noalias', 'replace') - result = structure.replace(vfrom, vto).strip() + result = structure.replace(vfrom, vto_name).strip() __pragma__('alias', 'replace', "py_replace") - # they said this also needs to be done - remove "-d$" from %s match - if len(match) > 1 and match[1].endswith("-d"): - result = result.replace(match[1], match[1][:-2]) - return result + return result, vto_id + window.console.log("Unknown structure: ", structure) return None diff --git a/src/message/ske_messages.py b/src/message/ske_messages.py index dd77d91..6900cdf 100644 --- a/src/message/ske_messages.py +++ b/src/message/ske_messages.py @@ -58,7 +58,7 @@ class SkeCollocation: def __init__(self, data): self.word = data.word self.frequency = data.count - self.structure_name = convert_structure(data.gramrel) + self.structure_name, self.structure_id = convert_structure(data.gramrel) self.other = {"score": data.score, "cm": data.cm} @@ -242,7 +242,7 @@ class SkeInsert(DataChgClickMessage): new_collocation = Example() new_collocation.inner = MultiwordExample() - new_collocation.inner.other_attributes["structureName"] = example.structure_name + new_collocation.inner.other_attributes["structure_id"] = example.structure_id new_collocation.inner.other_attributes["logDice"] = example.other["score"] new_collocation.inner.other_attributes["frequency"] = example.frequency new_collocation.inner.type = "collocation" diff --git a/src/model/example/example.py b/src/model/example/example.py index 8585d9a..e33df8f 100644 --- a/src/model/example/example.py +++ b/src/model/example/example.py @@ -31,7 +31,7 @@ class Example(Data): example.edited = True example.inner = MultiwordExample() example.inner.cluster = ExampleClusters.first_empty_cluster() - example.inner.type = "type??" + example.inner.type = "grammaticalCombination" empty_component = ComponentLexeme() empty_component.role = "headword"