structure conversions implemented, had to update makefile, gitignore etc.

This commit is contained in:
Ozbolt Menegatti 2020-04-22 14:22:02 +02:00
parent 90a6cda9fe
commit f609177d33
6 changed files with 79 additions and 4 deletions

2
.gitignore vendored
View File

@ -4,7 +4,7 @@ __pycache__
build/*
!build/Makefile
!build/browserify.js
!build/download_structure_conversions.sh
# using kdev4, works fairly nicely!
.kdev4

8
README.md Normal file
View File

@ -0,0 +1,8 @@
# Build
To build, you need transcrypt, you need to do npm install and then you need api key for gitea (`token=$API_KEY`), just find this on gitea.cjvt.si, not that hard.
After you have all of this, move to build folder, set `API_KEY` environment variable and then build it just using `make` or for release use `make release` - optimized javascript build.
For me its `API_KEY=(cat ~/.gitea_api_key) make`.

View File

@ -13,6 +13,7 @@ LESS_FILE=main.less
PY_FILE=main.py
HTML_FILE=main.html
XML_FILE=vsms1.xml
CONV_FILE=conversions.csv
RES_LESS_FILE=$(RES_FOLDER)/$(LESS_FILE)
RES_PY_FILE=$(SRC_FOLDER)/$(PY_FILE)
@ -32,7 +33,7 @@ build: copy $(CSS_FILE) bundle.js local.js
transcrypt: $(RES_PY_FILE) $(TS_BUILD)
transcrypt $(TS_FLAGS) $<
bundle.js: transcrypt
bundle.js: transcrypt $(CONV_FILE)
cd ..; browserify $(BR_FLAGS) build/$(TS_BUILD)/main.js -o build/bundle.js
local.js:
@ -50,9 +51,12 @@ $(HTML_FILE): $(RES_HTML_FILE)
$(XML_FILE): $(RES_XML_FILE)
cp $(RES_XML_FILE) $@
$(CONV_FILE): $(TS_BUILD)
./download_structure_conversions.sh
copy: $(XML_FILE) $(HTML_FILE)
clean:
rm -fr $(TS_BUILD)
rm -f $(HTML_FILE) $(XML_FILE) bundle.js index.html main.css local.js
rm -f $(HTML_FILE) $(XML_FILE) bundle.js index.html main.css local.js $(CONV_FILE)

View File

@ -0,0 +1,19 @@
#!/bin/bash
# fail if any command fails
set -e
if [[ -z "${API_KEY}" ]]; then
echo "API_KEY not defined, should be api key to CJVT's gitea"
exit 1
fi
OUT_FILE=conversions.csv
rm -f $OUT_FILE
for fname in glagol pridevnik prislov samostalnik; do
curl -s "https://gitea.cjvt.si/api/v1/repos/redmine_projects/kolokacije/contents/resources/structure_conversions/$fname.csv?token=$API_KEY" |
python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d >> $OUT_FILE
done

View File

@ -0,0 +1,44 @@
__pragma__ ('noanno')
__pragma__ ('js', """
var fs = require('fs');
var conversion_csv = fs.readFileSync('build/conversions.csv', 'utf8');
""", None)
# above is magically read (browserify plugin) at compile time
# browserify is run from root, so we need build/ in path for this to work
# convert to useful structure
structure_conversions = None
def build_structure_conversions():
global structure_conversions
structure_conversions = []
structure_conversions_raw = [line.split(",") for line in conversion_csv.split("\n")]
for line in structure_conversions_raw:
if min(len(line[0]), len(line[1])) == 0:
continue
# header
if line[1] == "struktura":
continue
vfrom = line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)")
vmatch = "^" + vfrom + "$"
vto = line[1].replace("<struktura>", "").replace("</struktura>", "").replace("%s", "$1").strip()
structure_conversions.append((__new__(RegExp(vmatch, 'u')),
__new__(RegExp(vfrom, 'u')),
vto))
def convert_structure(structure):
if structure_conversions is None:
build_structure_conversions()
for vmatch, vfrom, vto in structure_conversions:
if vmatch.test(structure):
# for some reason some times this fails to do correct replacement, but this helps #FML
# structure += " "
return structure.replace(vfrom, vto).strip()
return None

View File

@ -44,7 +44,7 @@ class SkeCollocation:
def __init__(self, data):
self.word = data.word
self.frequency = data.count
self.structure_name = data.gramrel
self.structure_name = convert_structure(data.gramrel)
self.other = {"score": data.score, "cm": data.cm}