better collocation export
This commit is contained in:
parent
8802a09c9c
commit
19d3e38dcb
|
@ -24,6 +24,7 @@ def build_structure_conversions():
|
||||||
if line[1] == "struktura":
|
if line[1] == "struktura":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
vto_structure = line[1].strip().split(">")[1].split("<")[0]
|
||||||
vto_name = line[2].strip()
|
vto_name = line[2].strip()
|
||||||
vto_id = line[4].strip()
|
vto_id = line[4].strip()
|
||||||
|
|
||||||
|
@ -31,14 +32,14 @@ def build_structure_conversions():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + "$"
|
vfrom = "^" + line[0].replace("?", "\?").replace("%s", "([a-zA-Z螚ȎŠ-]+)") + "$"
|
||||||
structure_conversions.append((__new__(RegExp(vfrom, 'u')), vto_name, vto_id))
|
structure_conversions.append((__new__(RegExp(vfrom, 'u')), vto_name, vto_structure, vto_id))
|
||||||
|
|
||||||
|
|
||||||
def convert_structure(structure, type):
|
def convert_structure(structure, type):
|
||||||
if structure_conversions is None:
|
if structure_conversions is None:
|
||||||
build_structure_conversions()
|
build_structure_conversions()
|
||||||
|
|
||||||
for vfrom, vto_name, vto_id in structure_conversions:
|
for vfrom, vto_name, vto_structure, vto_id in structure_conversions:
|
||||||
match = structure.match(vfrom)
|
match = structure.match(vfrom)
|
||||||
# fix for ids 65, 66, 67 which instead matched with 64
|
# fix for ids 65, 66, 67 which instead matched with 64
|
||||||
if match and vto_id == '64' and '-s' in type:
|
if match and vto_id == '64' and '-s' in type:
|
||||||
|
@ -57,7 +58,7 @@ def convert_structure(structure, type):
|
||||||
result = structure.replace(vfrom, vto_name).strip()
|
result = structure.replace(vfrom, vto_name).strip()
|
||||||
__pragma__('alias', 'replace', "py_replace")
|
__pragma__('alias', 'replace', "py_replace")
|
||||||
|
|
||||||
return result, vto_id
|
return result, vto_structure, vto_id
|
||||||
|
|
||||||
window.console.log("Unknown structure: ", structure)
|
window.console.log("Unknown structure: ", structure)
|
||||||
return 'N/A', '/'
|
return 'N/A', '/'
|
||||||
|
|
|
@ -59,7 +59,8 @@ class SkeCollocation:
|
||||||
def __init__(self, data):
|
def __init__(self, data):
|
||||||
self.word = data.word
|
self.word = data.word
|
||||||
self.frequency = data.count
|
self.frequency = data.count
|
||||||
self.structure_name, self.structure_id = convert_structure(data.gramrel, data.lempos)
|
self.gramrel = data.gramrel
|
||||||
|
self.structure_name, self.structure, self.structure_id = convert_structure(data.gramrel, data.lempos)
|
||||||
|
|
||||||
self.other = {"score": data.score, "cm": data.cm}
|
self.other = {"score": data.score, "cm": data.cm}
|
||||||
|
|
||||||
|
@ -145,9 +146,21 @@ class SkeModal(ClickMessage):
|
||||||
# we get the data, we have to match it with available data on our gf2 examples API
|
# we get the data, we have to match it with available data on our gf2 examples API
|
||||||
match_gf2_examples(data, page_num, search_term, ske_index)
|
match_gf2_examples(data, page_num, search_term, ske_index)
|
||||||
elif type(data[0]) is SkeCollocation:
|
elif type(data[0]) is SkeCollocation:
|
||||||
data.sort(key= lambda x: x.structure_name)
|
# filtering, grouping and sorting data
|
||||||
data = [x for x in data if x.structure_name is not "N/A"]
|
data.sort(key= lambda x: float(x.other["score"]), reverse=True)
|
||||||
|
_data = []
|
||||||
|
while len(data) > 0:
|
||||||
|
max_item = data.pop(0) # max(data, key= lambda x: x.other["score"])
|
||||||
|
_data.append(max_item)
|
||||||
|
for item in data:
|
||||||
|
if "N/A" in item.structure_name:
|
||||||
|
data.remove(item)
|
||||||
|
elif item.structure_name.strip() == max_item.structure_name.strip():
|
||||||
|
_data.append(item)
|
||||||
|
for delete_item in _data:
|
||||||
|
if delete_item in data:
|
||||||
|
data.remove(delete_item)
|
||||||
|
data = _data
|
||||||
model.modal_set(lambda: modals.ske_list(
|
model.modal_set(lambda: modals.ske_list(
|
||||||
search_term, data, page_num, model.entry.senses, model.ske.request_kinds))
|
search_term, data, page_num, model.entry.senses, model.ske.request_kinds))
|
||||||
|
|
||||||
|
@ -260,17 +273,47 @@ class SkeInsert(DataChgClickMessage):
|
||||||
new_collocation.inner.other_attributes["frequency"] = example.frequency
|
new_collocation.inner.other_attributes["frequency"] = example.frequency
|
||||||
new_collocation.inner.type = "collocation"
|
new_collocation.inner.type = "collocation"
|
||||||
|
|
||||||
lex_left = ComponentLexeme()
|
headword = document.getElementById("ske-search").value
|
||||||
lex_left.text = ""
|
lexemes = []
|
||||||
lex_left.role = None
|
structure_name = example.structure_name.split("-")
|
||||||
|
gramrel = example.gramrel.split("_")
|
||||||
|
structure = example.structure.split(" ")
|
||||||
|
structure.append("") # Bad fix: we have to add something for structure l-gg-ggn
|
||||||
|
|
||||||
lex_mid = ComponentLexeme()
|
for i in range(len(structure_name)):
|
||||||
lex_mid.text = example.word
|
console.log("current:", i, "\ngramrel:", gramrel[i], "\nstructure:", structure[i], "\nstruc_name:", structure_name[i])
|
||||||
lex_mid.role = "collocate"
|
|
||||||
|
|
||||||
lex_right = ComponentLexeme()
|
lex = ComponentLexeme()
|
||||||
lex_right.text = ""
|
structure[i] = structure[i].replace("Inf-", "")
|
||||||
lex_right.role = None
|
|
||||||
|
|
||||||
new_collocation.components.extend([lex_left, lex_mid, lex_right])
|
# take care of negations "ne"
|
||||||
|
if "Neg-" in structure[i]:
|
||||||
|
structure[i] = structure[i].replace("Neg-", "")
|
||||||
|
negation_flag = True
|
||||||
|
n_lex = ComponentLexeme()
|
||||||
|
n_lex.text = "ne"
|
||||||
|
n_lex.role = "other"
|
||||||
|
lexemes.append(n_lex)
|
||||||
|
|
||||||
|
if structure[i] is "":
|
||||||
|
continue # skipping bcs of fix
|
||||||
|
elif "Vez-gbz" in structure[i]:
|
||||||
|
lex.text = "je"
|
||||||
|
lex.role = "other"
|
||||||
|
elif structure_name[i] in ["d", "vd", "zp"]:
|
||||||
|
lex.text = gramrel[i]
|
||||||
|
lex.text = lex.text.replace("-d", "").replace("%", "")
|
||||||
|
lex.role = "other"
|
||||||
|
elif structure_name[i] is "vp":
|
||||||
|
lex.text = structure[i]
|
||||||
|
lex.role = "other"
|
||||||
|
elif structure[i][0] in ["S", "G", "P", "R"]:
|
||||||
|
lex.text = headword
|
||||||
|
lex.role = "headword"
|
||||||
|
else:
|
||||||
|
lex.text = example.word
|
||||||
|
lex.role = "collocate"
|
||||||
|
lexemes.append(lex)
|
||||||
|
|
||||||
|
new_collocation.components.extend(lexemes)
|
||||||
return new_collocation
|
return new_collocation
|
||||||
|
|
Loading…
Reference in New Issue
Block a user