From f609177d332eeeb761c023e0c49ce9dc08df1679 Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Wed, 22 Apr 2020 14:22:02 +0200 Subject: [PATCH] structure conversions implemented, had to update makefile, gitignore etc. --- .gitignore | 2 +- README.md | 8 +++++ build/Makefile | 8 +++-- build/download_structure_conversions.sh | 19 +++++++++++ src/lib/structure_conversions.py | 44 +++++++++++++++++++++++++ src/message/ske_messages.py | 2 +- 6 files changed, 79 insertions(+), 4 deletions(-) create mode 100644 README.md create mode 100755 build/download_structure_conversions.sh create mode 100644 src/lib/structure_conversions.py diff --git a/.gitignore b/.gitignore index fe55207..7b10cef 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ __pycache__ build/* !build/Makefile -!build/browserify.js +!build/download_structure_conversions.sh # using kdev4, works fairly nicely! .kdev4 diff --git a/README.md b/README.md new file mode 100644 index 0000000..8ff17f2 --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +# Build + +To build, you need transcrypt, you need to do npm install and then you need api key for gitea (`token=$API_KEY`), just find this on gitea.cjvt.si, not that hard. + +After you have all of this, move to build folder, set `API_KEY` environment variable and then build it just using `make` or for release use `make release` - optimized javascript build. + +For me its `API_KEY=(cat ~/.gitea_api_key) make`. + diff --git a/build/Makefile b/build/Makefile index 6011975..5113996 100644 --- a/build/Makefile +++ b/build/Makefile @@ -13,6 +13,7 @@ LESS_FILE=main.less PY_FILE=main.py HTML_FILE=main.html XML_FILE=vsms1.xml +CONV_FILE=conversions.csv RES_LESS_FILE=$(RES_FOLDER)/$(LESS_FILE) RES_PY_FILE=$(SRC_FOLDER)/$(PY_FILE) @@ -32,7 +33,7 @@ build: copy $(CSS_FILE) bundle.js local.js transcrypt: $(RES_PY_FILE) $(TS_BUILD) transcrypt $(TS_FLAGS) $< -bundle.js: transcrypt +bundle.js: transcrypt $(CONV_FILE) cd ..; browserify $(BR_FLAGS) build/$(TS_BUILD)/main.js -o build/bundle.js local.js: @@ -50,9 +51,12 @@ $(HTML_FILE): $(RES_HTML_FILE) $(XML_FILE): $(RES_XML_FILE) cp $(RES_XML_FILE) $@ +$(CONV_FILE): $(TS_BUILD) + ./download_structure_conversions.sh + copy: $(XML_FILE) $(HTML_FILE) clean: rm -fr $(TS_BUILD) - rm -f $(HTML_FILE) $(XML_FILE) bundle.js index.html main.css local.js + rm -f $(HTML_FILE) $(XML_FILE) bundle.js index.html main.css local.js $(CONV_FILE) diff --git a/build/download_structure_conversions.sh b/build/download_structure_conversions.sh new file mode 100755 index 0000000..3716385 --- /dev/null +++ b/build/download_structure_conversions.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# fail if any command fails +set -e + +if [[ -z "${API_KEY}" ]]; then + echo "API_KEY not defined, should be api key to CJVT's gitea" + exit 1 +fi + +OUT_FILE=conversions.csv + +rm -f $OUT_FILE + +for fname in glagol pridevnik prislov samostalnik; do + curl -s "https://gitea.cjvt.si/api/v1/repos/redmine_projects/kolokacije/contents/resources/structure_conversions/$fname.csv?token=$API_KEY" | + python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d >> $OUT_FILE +done + diff --git a/src/lib/structure_conversions.py b/src/lib/structure_conversions.py new file mode 100644 index 0000000..afce583 --- /dev/null +++ b/src/lib/structure_conversions.py @@ -0,0 +1,44 @@ +__pragma__ ('noanno') +__pragma__ ('js', """ +var fs = require('fs'); +var conversion_csv = fs.readFileSync('build/conversions.csv', 'utf8'); +""", None) +# above is magically read (browserify plugin) at compile time +# browserify is run from root, so we need build/ in path for this to work + +# convert to useful structure +structure_conversions = None + +def build_structure_conversions(): + global structure_conversions + structure_conversions = [] + + structure_conversions_raw = [line.split(",") for line in conversion_csv.split("\n")] + for line in structure_conversions_raw: + if min(len(line[0]), len(line[1])) == 0: + continue + + # header + if line[1] == "struktura": + continue + + vfrom = line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + vmatch = "^" + vfrom + "$" + vto = line[1].replace("", "").replace("", "").replace("%s", "$1").strip() + + structure_conversions.append((__new__(RegExp(vmatch, 'u')), + __new__(RegExp(vfrom, 'u')), + vto)) + + +def convert_structure(structure): + if structure_conversions is None: + build_structure_conversions() + + for vmatch, vfrom, vto in structure_conversions: + if vmatch.test(structure): + # for some reason some times this fails to do correct replacement, but this helps #FML + # structure += " " + return structure.replace(vfrom, vto).strip() + + return None diff --git a/src/message/ske_messages.py b/src/message/ske_messages.py index f780bc2..f01a515 100644 --- a/src/message/ske_messages.py +++ b/src/message/ske_messages.py @@ -44,7 +44,7 @@ class SkeCollocation: def __init__(self, data): self.word = data.word self.frequency = data.count - self.structure_name = data.gramrel + self.structure_name = convert_structure(data.gramrel) self.other = {"score": data.score, "cm": data.cm}