From 6e1f3bd16a680aaecf601e4995a2119267072c99 Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Wed, 8 Jul 2020 21:21:10 +0200 Subject: [PATCH 1/4] saving example_id correctly (see import of corpus example) --- src/message/ske_messages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/message/ske_messages.py b/src/message/ske_messages.py index 6900cdf..1a84b3e 100644 --- a/src/message/ske_messages.py +++ b/src/message/ske_messages.py @@ -220,7 +220,7 @@ class SkeInsert(DataChgClickMessage): def _as_corpus_example(self, example): new_example = Example() new_example.inner = CorpusExample() - new_example.inner.other_attributes["exampleId"] = example.s_id + new_example.inner.other_attributes["example_id"] = example.s_id new_example.inner.cluster = ExampleClusters.first_empty_cluster() lex_left = ComponentLexeme() From 8cb63eaa1d5d0ef74abe369000f31d5359798862 Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Wed, 8 Jul 2020 21:21:24 +0200 Subject: [PATCH 2/4] mid component should be colocation, not headword --- src/message/ske_messages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/message/ske_messages.py b/src/message/ske_messages.py index 1a84b3e..0032312 100644 --- a/src/message/ske_messages.py +++ b/src/message/ske_messages.py @@ -229,7 +229,7 @@ class SkeInsert(DataChgClickMessage): lex_mid = ComponentLexeme() lex_mid.text = example["mid"] - lex_mid.role = "headword" + lex_mid.role = "collocation" lex_right = ComponentLexeme() lex_right.text = example["right"] From 5396868601ff0ac5e0a13d9fb361a59b14a04b1c Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Wed, 8 Jul 2020 21:21:24 +0200 Subject: [PATCH 3/4] mid component should be colocation, not headword, Also, readd structureName and handle unknown structure when importing ske collocation --- src/message/ske_messages.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/message/ske_messages.py b/src/message/ske_messages.py index 0032312..9769f3a 100644 --- a/src/message/ske_messages.py +++ b/src/message/ske_messages.py @@ -58,7 +58,15 @@ class SkeCollocation: def __init__(self, data): self.word = data.word self.frequency = data.count - self.structure_name, self.structure_id = convert_structure(data.gramrel) + + info = convert_structure(data.gramrel) + if info is None: + self.structure_name = None + self.structure_id = None + else: + self.structure_name = info[0] + self.structure_id = info[1] + self.other = {"score": data.score, "cm": data.cm} @@ -243,6 +251,7 @@ class SkeInsert(DataChgClickMessage): new_collocation.inner = MultiwordExample() new_collocation.inner.other_attributes["structure_id"] = example.structure_id + new_collocation.inner.other_attributes["structureName"] = example.structure_name new_collocation.inner.other_attributes["logDice"] = example.other["score"] new_collocation.inner.other_attributes["frequency"] = example.frequency new_collocation.inner.type = "collocation" @@ -253,7 +262,7 @@ class SkeInsert(DataChgClickMessage): lex_mid = ComponentLexeme() lex_mid.text = example.word - lex_mid.role = "headword" + lex_mid.role = "collocation" lex_right = ComponentLexeme() lex_right.text = "" From b17510ffe7f4a1a511f5a0f5240dbc17abb218e3 Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Wed, 8 Jul 2020 21:52:42 +0200 Subject: [PATCH 4/4] improving build, correct phony targets and more correct and verbose download_structure_conversions --- build/Makefile | 3 +-- build/download_structure_conversions.sh | 10 +++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/build/Makefile b/build/Makefile index 5113996..ba3d85d 100644 --- a/build/Makefile +++ b/build/Makefile @@ -1,5 +1,3 @@ -.PHONY: all release build transcrypt copy $(RES_LESS_FILE) $(RES_PY_FILE) $(RES_HTML_FILE) $(RES_XML_FILE) - SRC_FOLDER=$(CURDIR)/../src RES_FOLDER=$(CURDIR)/../res @@ -20,6 +18,7 @@ RES_PY_FILE=$(SRC_FOLDER)/$(PY_FILE) RES_HTML_FILE=$(RES_FOLDER)/$(HTML_FILE) RES_XML_FILE=$(RES_FOLDER)/$(XML_FILE) +.PHONY: all release build transcrypt copy $(RES_LESS_FILE) $(RES_PY_FILE) $(RES_HTML_FILE) $(RES_XML_FILE) $(CONV_FILE) .DEFAULT_GOAL := all all: BR_FLAGS := $(BR_FLAGS) --debug diff --git a/build/download_structure_conversions.sh b/build/download_structure_conversions.sh index e313eb9..db8540a 100755 --- a/build/download_structure_conversions.sh +++ b/build/download_structure_conversions.sh @@ -3,15 +3,19 @@ # fail if any command fails set -e +OUT_FILE=conversions.csv + if [[ -z "${API_KEY}" ]]; then echo "API_KEY not defined, should be api key to CJVT's gitea" exit 1 fi -OUT_FILE=conversions.csv -TEMP_FILE=$(mktemp) +if [ -f $OUT_FILE ]; then + echo "file for conversion of SKE collocations: '$OUT_FILE' already exists, skipping." + exit 0 +fi -rm -r $OUT_FILE +TEMP_FILE=$(mktemp) curl -s "https://gitea.cjvt.si/api/v1/repos/generic/data_admin/contents/resources/structure_conversions.csv?token=$API_KEY" -o $TEMP_FILE echo "wc: $(wc $TEMP_FILE)"