Fixed downloading and parsing of structures
Finish tommorow?
This commit is contained in:
parent
887eb37d20
commit
f1f540e5da
|
@ -9,11 +9,12 @@ if [[ -z "${API_KEY}" ]]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
OUT_FILE=conversions.csv
|
OUT_FILE=conversions.csv
|
||||||
|
TEMP_FILE=$(mktemp)
|
||||||
|
|
||||||
rm -f $OUT_FILE
|
rm -r $OUT_FILE
|
||||||
|
|
||||||
for fname in glagol pridevnik prislov samostalnik; do
|
curl -s "https://gitea.cjvt.si/api/v1/repos/generic/data_admin/contents/resources/structure_conversions.csv?token=$API_KEY" -o $TEMP_FILE
|
||||||
curl -s "https://gitea.cjvt.si/api/v1/repos/redmine_projects/kolokacije/contents/resources/structure_conversions/$fname.csv?token=$API_KEY" |
|
echo "wc: $(wc $TEMP_FILE)"
|
||||||
python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d >> $OUT_FILE
|
cat $TEMP_FILE | python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d > $OUT_FILE
|
||||||
done
|
|
||||||
|
|
||||||
|
rm -rf $TEMP_FILE
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
from browser import window
|
||||||
|
|
||||||
__pragma__ ('noanno')
|
__pragma__ ('noanno')
|
||||||
__pragma__ ('js', """
|
__pragma__ ('js', """
|
||||||
var fs = require('fs');
|
var fs = require('fs');
|
||||||
|
@ -13,7 +15,7 @@ def build_structure_conversions():
|
||||||
global structure_conversions
|
global structure_conversions
|
||||||
structure_conversions = []
|
structure_conversions = []
|
||||||
|
|
||||||
structure_conversions_raw = [line.split(",") for line in conversion_csv.split("\n")]
|
structure_conversions_raw = [line.split("|") for line in conversion_csv.split("\n")]
|
||||||
for line in structure_conversions_raw:
|
for line in structure_conversions_raw:
|
||||||
if min(len(line[0]), len(line[1])) == 0:
|
if min(len(line[0]), len(line[1])) == 0:
|
||||||
continue
|
continue
|
||||||
|
@ -22,28 +24,29 @@ def build_structure_conversions():
|
||||||
if line[1] == "struktura":
|
if line[1] == "struktura":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + "$"
|
vto_name = line[2].strip()
|
||||||
vto = line[1].replace("<struktura>", "").replace("</struktura>", "").replace("%s", "$1").strip()
|
vto_id = line[4].strip()
|
||||||
|
|
||||||
structure_conversions.append((__new__(RegExp(vfrom, 'u')),
|
if 0 in (len(vto_name), len(vto_id)):
|
||||||
vto))
|
continue
|
||||||
|
|
||||||
|
vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + "$"
|
||||||
|
structure_conversions.append((__new__(RegExp(vfrom, 'u')), vto_name, vto_id))
|
||||||
|
|
||||||
|
|
||||||
def convert_structure(structure):
|
def convert_structure(structure):
|
||||||
if structure_conversions is None:
|
if structure_conversions is None:
|
||||||
build_structure_conversions()
|
build_structure_conversions()
|
||||||
|
|
||||||
for vfrom, vto in structure_conversions:
|
for vfrom, vto_name, vto_id in structure_conversions:
|
||||||
match = structure.match(vfrom)
|
match = structure.match(vfrom)
|
||||||
if match:
|
if match:
|
||||||
# we need to remove replace alias here as we want to use javascript's one
|
# we need to remove replace alias here as we want to use javascript's one
|
||||||
__pragma__('noalias', 'replace')
|
__pragma__('noalias', 'replace')
|
||||||
result = structure.replace(vfrom, vto).strip()
|
result = structure.replace(vfrom, vto_name).strip()
|
||||||
__pragma__('alias', 'replace', "py_replace")
|
__pragma__('alias', 'replace', "py_replace")
|
||||||
|
|
||||||
# they said this also needs to be done - remove "-d$" from %s match
|
return result, vto_id
|
||||||
if len(match) > 1 and match[1].endswith("-d"):
|
|
||||||
result = result.replace(match[1], match[1][:-2])
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
window.console.log("Unknown structure: ", structure)
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -58,7 +58,7 @@ class SkeCollocation:
|
||||||
def __init__(self, data):
|
def __init__(self, data):
|
||||||
self.word = data.word
|
self.word = data.word
|
||||||
self.frequency = data.count
|
self.frequency = data.count
|
||||||
self.structure_name = convert_structure(data.gramrel)
|
self.structure_name, self.structure_id = convert_structure(data.gramrel)
|
||||||
|
|
||||||
self.other = {"score": data.score, "cm": data.cm}
|
self.other = {"score": data.score, "cm": data.cm}
|
||||||
|
|
||||||
|
@ -242,7 +242,7 @@ class SkeInsert(DataChgClickMessage):
|
||||||
new_collocation = Example()
|
new_collocation = Example()
|
||||||
new_collocation.inner = MultiwordExample()
|
new_collocation.inner = MultiwordExample()
|
||||||
|
|
||||||
new_collocation.inner.other_attributes["structureName"] = example.structure_name
|
new_collocation.inner.other_attributes["structure_id"] = example.structure_id
|
||||||
new_collocation.inner.other_attributes["logDice"] = example.other["score"]
|
new_collocation.inner.other_attributes["logDice"] = example.other["score"]
|
||||||
new_collocation.inner.other_attributes["frequency"] = example.frequency
|
new_collocation.inner.other_attributes["frequency"] = example.frequency
|
||||||
new_collocation.inner.type = "collocation"
|
new_collocation.inner.type = "collocation"
|
||||||
|
|
|
@ -31,7 +31,7 @@ class Example(Data):
|
||||||
example.edited = True
|
example.edited = True
|
||||||
example.inner = MultiwordExample()
|
example.inner = MultiwordExample()
|
||||||
example.inner.cluster = ExampleClusters.first_empty_cluster()
|
example.inner.cluster = ExampleClusters.first_empty_cluster()
|
||||||
example.inner.type = "type??"
|
example.inner.type = "grammaticalCombination"
|
||||||
|
|
||||||
empty_component = ComponentLexeme()
|
empty_component = ComponentLexeme()
|
||||||
empty_component.role = "headword"
|
empty_component.role = "headword"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user