Fixed downloading and parsing of structures
Finish tommorow?
This commit is contained in:
parent
887eb37d20
commit
f1f540e5da
|
@ -9,11 +9,12 @@ if [[ -z "${API_KEY}" ]]; then
|
|||
fi
|
||||
|
||||
OUT_FILE=conversions.csv
|
||||
TEMP_FILE=$(mktemp)
|
||||
|
||||
rm -f $OUT_FILE
|
||||
rm -r $OUT_FILE
|
||||
|
||||
for fname in glagol pridevnik prislov samostalnik; do
|
||||
curl -s "https://gitea.cjvt.si/api/v1/repos/redmine_projects/kolokacije/contents/resources/structure_conversions/$fname.csv?token=$API_KEY" |
|
||||
python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d >> $OUT_FILE
|
||||
done
|
||||
curl -s "https://gitea.cjvt.si/api/v1/repos/generic/data_admin/contents/resources/structure_conversions.csv?token=$API_KEY" -o $TEMP_FILE
|
||||
echo "wc: $(wc $TEMP_FILE)"
|
||||
cat $TEMP_FILE | python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d > $OUT_FILE
|
||||
|
||||
rm -rf $TEMP_FILE
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
from browser import window
|
||||
|
||||
__pragma__ ('noanno')
|
||||
__pragma__ ('js', """
|
||||
var fs = require('fs');
|
||||
|
@ -13,7 +15,7 @@ def build_structure_conversions():
|
|||
global structure_conversions
|
||||
structure_conversions = []
|
||||
|
||||
structure_conversions_raw = [line.split(",") for line in conversion_csv.split("\n")]
|
||||
structure_conversions_raw = [line.split("|") for line in conversion_csv.split("\n")]
|
||||
for line in structure_conversions_raw:
|
||||
if min(len(line[0]), len(line[1])) == 0:
|
||||
continue
|
||||
|
@ -22,28 +24,29 @@ def build_structure_conversions():
|
|||
if line[1] == "struktura":
|
||||
continue
|
||||
|
||||
vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + "$"
|
||||
vto = line[1].replace("<struktura>", "").replace("</struktura>", "").replace("%s", "$1").strip()
|
||||
vto_name = line[2].strip()
|
||||
vto_id = line[4].strip()
|
||||
|
||||
structure_conversions.append((__new__(RegExp(vfrom, 'u')),
|
||||
vto))
|
||||
if 0 in (len(vto_name), len(vto_id)):
|
||||
continue
|
||||
|
||||
vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + "$"
|
||||
structure_conversions.append((__new__(RegExp(vfrom, 'u')), vto_name, vto_id))
|
||||
|
||||
|
||||
def convert_structure(structure):
|
||||
if structure_conversions is None:
|
||||
build_structure_conversions()
|
||||
|
||||
for vfrom, vto in structure_conversions:
|
||||
for vfrom, vto_name, vto_id in structure_conversions:
|
||||
match = structure.match(vfrom)
|
||||
if match:
|
||||
# we need to remove replace alias here as we want to use javascript's one
|
||||
__pragma__('noalias', 'replace')
|
||||
result = structure.replace(vfrom, vto).strip()
|
||||
result = structure.replace(vfrom, vto_name).strip()
|
||||
__pragma__('alias', 'replace', "py_replace")
|
||||
|
||||
# they said this also needs to be done - remove "-d$" from %s match
|
||||
if len(match) > 1 and match[1].endswith("-d"):
|
||||
result = result.replace(match[1], match[1][:-2])
|
||||
return result
|
||||
return result, vto_id
|
||||
|
||||
window.console.log("Unknown structure: ", structure)
|
||||
return None
|
||||
|
|
|
@ -58,7 +58,7 @@ class SkeCollocation:
|
|||
def __init__(self, data):
|
||||
self.word = data.word
|
||||
self.frequency = data.count
|
||||
self.structure_name = convert_structure(data.gramrel)
|
||||
self.structure_name, self.structure_id = convert_structure(data.gramrel)
|
||||
|
||||
self.other = {"score": data.score, "cm": data.cm}
|
||||
|
||||
|
@ -242,7 +242,7 @@ class SkeInsert(DataChgClickMessage):
|
|||
new_collocation = Example()
|
||||
new_collocation.inner = MultiwordExample()
|
||||
|
||||
new_collocation.inner.other_attributes["structureName"] = example.structure_name
|
||||
new_collocation.inner.other_attributes["structure_id"] = example.structure_id
|
||||
new_collocation.inner.other_attributes["logDice"] = example.other["score"]
|
||||
new_collocation.inner.other_attributes["frequency"] = example.frequency
|
||||
new_collocation.inner.type = "collocation"
|
||||
|
|
|
@ -31,7 +31,7 @@ class Example(Data):
|
|||
example.edited = True
|
||||
example.inner = MultiwordExample()
|
||||
example.inner.cluster = ExampleClusters.first_empty_cluster()
|
||||
example.inner.type = "type??"
|
||||
example.inner.type = "grammaticalCombination"
|
||||
|
||||
empty_component = ComponentLexeme()
|
||||
empty_component.role = "headword"
|
||||
|
|
Loading…
Reference in New Issue
Block a user