Fixed downloading and parsing of structures
Finish tommorow?
This commit is contained in:
		
							parent
							
								
									887eb37d20
								
							
						
					
					
						commit
						f1f540e5da
					
				| @ -9,11 +9,12 @@ if [[ -z "${API_KEY}" ]]; then | ||||
| fi | ||||
| 
 | ||||
| OUT_FILE=conversions.csv | ||||
| TEMP_FILE=$(mktemp) | ||||
| 
 | ||||
| rm -f $OUT_FILE | ||||
| rm -r $OUT_FILE | ||||
| 
 | ||||
| for fname in glagol pridevnik prislov samostalnik; do | ||||
|   curl -s "https://gitea.cjvt.si/api/v1/repos/redmine_projects/kolokacije/contents/resources/structure_conversions/$fname.csv?token=$API_KEY" |  | ||||
|     python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d >> $OUT_FILE | ||||
| done | ||||
| curl -s "https://gitea.cjvt.si/api/v1/repos/generic/data_admin/contents/resources/structure_conversions.csv?token=$API_KEY" -o $TEMP_FILE | ||||
| echo "wc: $(wc $TEMP_FILE)" | ||||
| cat $TEMP_FILE | python3 -c "import sys, json; print(json.load(sys.stdin)['content'])" | base64 -d > $OUT_FILE | ||||
| 
 | ||||
| rm -rf $TEMP_FILE | ||||
|  | ||||
| @ -1,3 +1,5 @@ | ||||
| from browser import window | ||||
| 
 | ||||
| __pragma__ ('noanno') | ||||
| __pragma__ ('js', """ | ||||
| var fs = require('fs'); | ||||
| @ -13,7 +15,7 @@ def build_structure_conversions(): | ||||
|     global structure_conversions | ||||
|     structure_conversions = [] | ||||
|      | ||||
|     structure_conversions_raw = [line.split(",") for line in conversion_csv.split("\n")] | ||||
|     structure_conversions_raw = [line.split("|") for line in conversion_csv.split("\n")] | ||||
|     for line in structure_conversions_raw: | ||||
|         if min(len(line[0]), len(line[1])) == 0: | ||||
|             continue | ||||
| @ -22,28 +24,29 @@ def build_structure_conversions(): | ||||
|         if line[1] == "struktura": | ||||
|             continue | ||||
|          | ||||
|         vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + "$" | ||||
|         vto = line[1].replace("<struktura>", "").replace("</struktura>", "").replace("%s", "$1").strip() | ||||
|         vto_name = line[2].strip() | ||||
|         vto_id = line[4].strip() | ||||
|          | ||||
|         structure_conversions.append((__new__(RegExp(vfrom, 'u')),  | ||||
|                                       vto)) | ||||
|         if 0 in (len(vto_name), len(vto_id)): | ||||
|             continue | ||||
|          | ||||
|         vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + "$" | ||||
|         structure_conversions.append((__new__(RegExp(vfrom, 'u')), vto_name, vto_id)) | ||||
|          | ||||
|          | ||||
| def convert_structure(structure): | ||||
|     if structure_conversions is None: | ||||
|         build_structure_conversions() | ||||
|      | ||||
|     for vfrom, vto in structure_conversions: | ||||
|     for vfrom, vto_name, vto_id in structure_conversions: | ||||
|         match = structure.match(vfrom) | ||||
|         if match: | ||||
|             # we need to remove replace alias here as we want to use javascript's one | ||||
|             __pragma__('noalias', 'replace') | ||||
|             result = structure.replace(vfrom, vto).strip() | ||||
|             result = structure.replace(vfrom, vto_name).strip() | ||||
|             __pragma__('alias', 'replace', "py_replace") | ||||
|              | ||||
|             # they said this also needs to be done - remove "-d$" from %s match | ||||
|             if len(match) > 1 and match[1].endswith("-d"): | ||||
|                 result = result.replace(match[1], match[1][:-2]) | ||||
|             return result | ||||
|             return result, vto_id | ||||
|      | ||||
|     window.console.log("Unknown structure: ", structure) | ||||
|     return None | ||||
|  | ||||
| @ -58,7 +58,7 @@ class SkeCollocation: | ||||
|     def __init__(self, data): | ||||
|         self.word = data.word | ||||
|         self.frequency = data.count | ||||
|         self.structure_name = convert_structure(data.gramrel) | ||||
|         self.structure_name, self.structure_id = convert_structure(data.gramrel) | ||||
|          | ||||
|         self.other = {"score": data.score, "cm": data.cm} | ||||
| 
 | ||||
| @ -242,7 +242,7 @@ class SkeInsert(DataChgClickMessage): | ||||
|         new_collocation = Example() | ||||
|         new_collocation.inner = MultiwordExample() | ||||
|          | ||||
|         new_collocation.inner.other_attributes["structureName"] = example.structure_name | ||||
|         new_collocation.inner.other_attributes["structure_id"] = example.structure_id | ||||
|         new_collocation.inner.other_attributes["logDice"] = example.other["score"] | ||||
|         new_collocation.inner.other_attributes["frequency"] = example.frequency | ||||
|         new_collocation.inner.type = "collocation" | ||||
|  | ||||
| @ -31,7 +31,7 @@ class Example(Data): | ||||
|         example.edited = True | ||||
|         example.inner = MultiwordExample() | ||||
|         example.inner.cluster = ExampleClusters.first_empty_cluster() | ||||
|         example.inner.type = "type??" | ||||
|         example.inner.type = "grammaticalCombination" | ||||
|          | ||||
|         empty_component = ComponentLexeme() | ||||
|         empty_component.role = "headword" | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user