Fixed downloading and parsing of structures

Finish tommorow?
This commit is contained in:
Ozbolt Menegatti 2020-07-01 23:16:02 +02:00
parent 887eb37d20
commit f1f540e5da
4 changed files with 23 additions and 19 deletions

View File

@ -9,11 +9,12 @@ if [[ -z "${API_KEY}" ]]; then
fi fi
OUT_FILE=conversions.csv OUT_FILE=conversions.csv
TEMP_FILE=$(mktemp)
rm -f $OUT_FILE rm -r $OUT_FILE
for fname in glagol pridevnik prislov samostalnik; do curl -s "https://gitea.cjvt.si/api/v1/repos/generic/data_admin/contents/resources/structure_conversions.csv?token=$API_KEY" -o $TEMP_FILE
curl -s "https://gitea.cjvt.si/api/v1/repos/redmine_projects/kolokacije/contents/resources/structure_conversions/$fname.csv?token=$API_KEY" | echo "wc: $(wc $TEMP_FILE)"
python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d >> $OUT_FILE cat $TEMP_FILE | python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d > $OUT_FILE
done
rm -rf $TEMP_FILE

View File

@ -1,3 +1,5 @@
from browser import window
__pragma__ ('noanno') __pragma__ ('noanno')
__pragma__ ('js', """ __pragma__ ('js', """
var fs = require('fs'); var fs = require('fs');
@ -13,7 +15,7 @@ def build_structure_conversions():
global structure_conversions global structure_conversions
structure_conversions = [] structure_conversions = []
structure_conversions_raw = [line.split(",") for line in conversion_csv.split("\n")] structure_conversions_raw = [line.split("|") for line in conversion_csv.split("\n")]
for line in structure_conversions_raw: for line in structure_conversions_raw:
if min(len(line[0]), len(line[1])) == 0: if min(len(line[0]), len(line[1])) == 0:
continue continue
@ -22,28 +24,29 @@ def build_structure_conversions():
if line[1] == "struktura": if line[1] == "struktura":
continue continue
vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + "$" vto_name = line[2].strip()
vto = line[1].replace("<struktura>", "").replace("</struktura>", "").replace("%s", "$1").strip() vto_id = line[4].strip()
structure_conversions.append((__new__(RegExp(vfrom, 'u')), if 0 in (len(vto_name), len(vto_id)):
vto)) continue
vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + "$"
structure_conversions.append((__new__(RegExp(vfrom, 'u')), vto_name, vto_id))
def convert_structure(structure): def convert_structure(structure):
if structure_conversions is None: if structure_conversions is None:
build_structure_conversions() build_structure_conversions()
for vfrom, vto in structure_conversions: for vfrom, vto_name, vto_id in structure_conversions:
match = structure.match(vfrom) match = structure.match(vfrom)
if match: if match:
# we need to remove replace alias here as we want to use javascript's one # we need to remove replace alias here as we want to use javascript's one
__pragma__('noalias', 'replace') __pragma__('noalias', 'replace')
result = structure.replace(vfrom, vto).strip() result = structure.replace(vfrom, vto_name).strip()
__pragma__('alias', 'replace', "py_replace") __pragma__('alias', 'replace', "py_replace")
# they said this also needs to be done - remove "-d$" from %s match return result, vto_id
if len(match) > 1 and match[1].endswith("-d"):
result = result.replace(match[1], match[1][:-2])
return result
window.console.log("Unknown structure: ", structure)
return None return None

View File

@ -58,7 +58,7 @@ class SkeCollocation:
def __init__(self, data): def __init__(self, data):
self.word = data.word self.word = data.word
self.frequency = data.count self.frequency = data.count
self.structure_name = convert_structure(data.gramrel) self.structure_name, self.structure_id = convert_structure(data.gramrel)
self.other = {"score": data.score, "cm": data.cm} self.other = {"score": data.score, "cm": data.cm}
@ -242,7 +242,7 @@ class SkeInsert(DataChgClickMessage):
new_collocation = Example() new_collocation = Example()
new_collocation.inner = MultiwordExample() new_collocation.inner = MultiwordExample()
new_collocation.inner.other_attributes["structureName"] = example.structure_name new_collocation.inner.other_attributes["structure_id"] = example.structure_id
new_collocation.inner.other_attributes["logDice"] = example.other["score"] new_collocation.inner.other_attributes["logDice"] = example.other["score"]
new_collocation.inner.other_attributes["frequency"] = example.frequency new_collocation.inner.other_attributes["frequency"] = example.frequency
new_collocation.inner.type = "collocation" new_collocation.inner.type = "collocation"

View File

@ -31,7 +31,7 @@ class Example(Data):
example.edited = True example.edited = True
example.inner = MultiwordExample() example.inner = MultiwordExample()
example.inner.cluster = ExampleClusters.first_empty_cluster() example.inner.cluster = ExampleClusters.first_empty_cluster()
example.inner.type = "type??" example.inner.type = "grammaticalCombination"
empty_component = ComponentLexeme() empty_component = ComponentLexeme()
empty_component.role = "headword" empty_component.role = "headword"