diff --git a/build/Makefile b/build/Makefile index 5113996..ba3d85d 100644 --- a/build/Makefile +++ b/build/Makefile @@ -1,5 +1,3 @@ -.PHONY: all release build transcrypt copy $(RES_LESS_FILE) $(RES_PY_FILE) $(RES_HTML_FILE) $(RES_XML_FILE) - SRC_FOLDER=$(CURDIR)/../src RES_FOLDER=$(CURDIR)/../res @@ -20,6 +18,7 @@ RES_PY_FILE=$(SRC_FOLDER)/$(PY_FILE) RES_HTML_FILE=$(RES_FOLDER)/$(HTML_FILE) RES_XML_FILE=$(RES_FOLDER)/$(XML_FILE) +.PHONY: all release build transcrypt copy $(RES_LESS_FILE) $(RES_PY_FILE) $(RES_HTML_FILE) $(RES_XML_FILE) $(CONV_FILE) .DEFAULT_GOAL := all all: BR_FLAGS := $(BR_FLAGS) --debug diff --git a/build/download_structure_conversions.sh b/build/download_structure_conversions.sh index e313eb9..db8540a 100755 --- a/build/download_structure_conversions.sh +++ b/build/download_structure_conversions.sh @@ -3,15 +3,19 @@ # fail if any command fails set -e +OUT_FILE=conversions.csv + if [[ -z "${API_KEY}" ]]; then echo "API_KEY not defined, should be api key to CJVT's gitea" exit 1 fi -OUT_FILE=conversions.csv -TEMP_FILE=$(mktemp) +if [ -f $OUT_FILE ]; then + echo "file for conversion of SKE collocations: '$OUT_FILE' already exists, skipping." + exit 0 +fi -rm -r $OUT_FILE +TEMP_FILE=$(mktemp) curl -s "https://gitea.cjvt.si/api/v1/repos/generic/data_admin/contents/resources/structure_conversions.csv?token=$API_KEY" -o $TEMP_FILE echo "wc: $(wc $TEMP_FILE)" diff --git a/src/message/ske_messages.py b/src/message/ske_messages.py index 6900cdf..9769f3a 100644 --- a/src/message/ske_messages.py +++ b/src/message/ske_messages.py @@ -58,7 +58,15 @@ class SkeCollocation: def __init__(self, data): self.word = data.word self.frequency = data.count - self.structure_name, self.structure_id = convert_structure(data.gramrel) + + info = convert_structure(data.gramrel) + if info is None: + self.structure_name = None + self.structure_id = None + else: + self.structure_name = info[0] + self.structure_id = info[1] + self.other = {"score": data.score, "cm": data.cm} @@ -220,7 +228,7 @@ class SkeInsert(DataChgClickMessage): def _as_corpus_example(self, example): new_example = Example() new_example.inner = CorpusExample() - new_example.inner.other_attributes["exampleId"] = example.s_id + new_example.inner.other_attributes["example_id"] = example.s_id new_example.inner.cluster = ExampleClusters.first_empty_cluster() lex_left = ComponentLexeme() @@ -229,7 +237,7 @@ class SkeInsert(DataChgClickMessage): lex_mid = ComponentLexeme() lex_mid.text = example["mid"] - lex_mid.role = "headword" + lex_mid.role = "collocation" lex_right = ComponentLexeme() lex_right.text = example["right"] @@ -243,6 +251,7 @@ class SkeInsert(DataChgClickMessage): new_collocation.inner = MultiwordExample() new_collocation.inner.other_attributes["structure_id"] = example.structure_id + new_collocation.inner.other_attributes["structureName"] = example.structure_name new_collocation.inner.other_attributes["logDice"] = example.other["score"] new_collocation.inner.other_attributes["frequency"] = example.frequency new_collocation.inner.type = "collocation" @@ -253,7 +262,7 @@ class SkeInsert(DataChgClickMessage): lex_mid = ComponentLexeme() lex_mid.text = example.word - lex_mid.role = "headword" + lex_mid.role = "collocation" lex_right = ComponentLexeme() lex_right.text = ""