Removing deprecated commented out code.
This commit is contained in:
parent
ed83b2b9c4
commit
fe4c95939f
133
wani.py
133
wani.py
|
@ -381,80 +381,6 @@ class ComponentRendition:
|
||||||
|
|
||||||
representations[agr][0].agreement.append(rep)
|
representations[agr][0].agreement.append(rep)
|
||||||
|
|
||||||
# representations = {
|
|
||||||
# c.idx: [[], None] if c.representation.isit(Rendition.WordForm) else [True, ""]
|
|
||||||
# for c in structure.components
|
|
||||||
# }
|
|
||||||
# found_agreements = {}
|
|
||||||
|
|
||||||
# def render_form(component_id, lst, backup_word):
|
|
||||||
# if backup_word is not None:
|
|
||||||
# lst.append(backup_word)
|
|
||||||
|
|
||||||
# text_forms = {}
|
|
||||||
# msd_lemma_txt_triplets = Counter([(w.msd, w.lemma, w.text) for w in lst])
|
|
||||||
# for (msd, lemma, text), _n in reversed(msd_lemma_txt_triplets.most_common()):
|
|
||||||
# text_forms[(msd, lemma)] = text
|
|
||||||
|
|
||||||
# lst_ctr = []
|
|
||||||
# for word in lst:
|
|
||||||
# lst_ctr.append((word.msd, word.lemma))
|
|
||||||
# sorted_lst = sorted(set(lst_ctr), key=lst.count)
|
|
||||||
|
|
||||||
# for word_msd, word_lemma in sorted_lst:
|
|
||||||
# if component_id in found_agreements:
|
|
||||||
# other_component_id, other_word, agreements, other_texts = found_agreements[component_id]
|
|
||||||
# agr = are_agreements_ok(word_msd, other_word.lemma, other_word.msd, agreements, other_texts)
|
|
||||||
# if agr is None:
|
|
||||||
# continue
|
|
||||||
|
|
||||||
# matches.representations[other_component_id] = agr
|
|
||||||
|
|
||||||
# if word_lemma is not None:
|
|
||||||
# matches.representations[component_id] = text_forms[(msd, lemma)] #word_renderer.render(word_lemma, word_msd)
|
|
||||||
|
|
||||||
# break
|
|
||||||
|
|
||||||
# def are_agreements_ok(w1_msd, ow_lemma, ow_msd, agreements, ow_texts):
|
|
||||||
# for w2_msd, w2_txt in word_renderer.available_words(ow_lemma, ow_texts):
|
|
||||||
# if ow_msd[0] != w2_msd[0]:
|
|
||||||
# continue
|
|
||||||
|
|
||||||
# if check_agreement(w1_msd, w2_msd, agreements):
|
|
||||||
# return w2_txt
|
|
||||||
|
|
||||||
|
|
||||||
# def check_agreement(msd1, msd2, agreements):
|
|
||||||
# for agr_case in agreements:
|
|
||||||
# t1 = msd1[0]
|
|
||||||
# # if not in msd, some strange msd was tries, skipping...
|
|
||||||
# if agr_case not in TAGSET[t1]:
|
|
||||||
# logging.warning("Cannot do agreement: {} for msd {} not found!".format(agr_case, msd1))
|
|
||||||
# return False
|
|
||||||
|
|
||||||
# v1 = TAGSET[t1].index(agr_case)
|
|
||||||
# # if none specified: nedolocnik, always agrees
|
|
||||||
# if v1 + 1 >= len(msd1):
|
|
||||||
# continue
|
|
||||||
# # first is uppercase, not in TAGSET
|
|
||||||
# m1 = msd1[v1 + 1]
|
|
||||||
|
|
||||||
# # REPEAT (not DRY!)
|
|
||||||
# t2 = msd2[0]
|
|
||||||
# if agr_case not in TAGSET[t2]:
|
|
||||||
# logging.warning("Cannot do agreement: {} for msd {} not found!".format(agr_case, msd2))
|
|
||||||
# return False
|
|
||||||
# v2 = TAGSET[t2].index(agr_case)
|
|
||||||
# if v2 + 1 >= len(msd2):
|
|
||||||
# continue
|
|
||||||
# m2 = msd2[v2 + 1]
|
|
||||||
|
|
||||||
# # match!
|
|
||||||
# if '-' not in [m1, m2] and m1 != m2:
|
|
||||||
# return False
|
|
||||||
|
|
||||||
# return True
|
|
||||||
|
|
||||||
for words in matches.matches:
|
for words in matches.matches:
|
||||||
# first pass, check everything but agreements
|
# first pass, check everything but agreements
|
||||||
for w_id, w in words.items():
|
for w_id, w in words.items():
|
||||||
|
@ -462,43 +388,6 @@ class ComponentRendition:
|
||||||
component_representations = representations[component.idx]
|
component_representations = representations[component.idx]
|
||||||
for representation in component_representations:
|
for representation in component_representations:
|
||||||
representation.add_word(w)
|
representation.add_word(w)
|
||||||
|
|
||||||
# if rep.isit(Rendition.Lemma):
|
|
||||||
# representations[w_id][0] = False
|
|
||||||
# representations[w_id][1] = w.lemma
|
|
||||||
# elif rep.isit(Rendition.Lexis):
|
|
||||||
# representations[w_id][0] = False
|
|
||||||
# representations[w_id][1] = rep.more
|
|
||||||
# elif rep.isit(Rendition.Unknown):
|
|
||||||
# representations[w_id][0] = False
|
|
||||||
# representations[w_id][1] = ""
|
|
||||||
|
|
||||||
# # it HAS to be word_form now
|
|
||||||
# else:
|
|
||||||
# assert(rep.isit(Rendition.WordForm))
|
|
||||||
# wf_type, more = rep.more
|
|
||||||
# add = True
|
|
||||||
|
|
||||||
# if wf_type is WordFormSelection.Msd:
|
|
||||||
# add = check_msd(w, more)
|
|
||||||
# func = render_form
|
|
||||||
# elif wf_type is WordFormSelection.All:
|
|
||||||
# func = render_all
|
|
||||||
# elif wf_type is WordFormSelection.Any:
|
|
||||||
# func = render_form
|
|
||||||
# else:
|
|
||||||
# assert(wf_type is WordFormSelection.Agreement)
|
|
||||||
# other_w, agreements = more
|
|
||||||
# if other_w not in found_agreements:
|
|
||||||
# found_agreements[other_w] = (w_id, w, agreements, [])
|
|
||||||
|
|
||||||
# found_agreements[other_w][-1].append((w.msd, w.text))
|
|
||||||
# func = lambda *x: None
|
|
||||||
|
|
||||||
# representations[w_id][1] = func
|
|
||||||
# if add:
|
|
||||||
# representations[w_id][0].append(w)
|
|
||||||
|
|
||||||
|
|
||||||
for cid, reps in representations.items():
|
for cid, reps in representations.items():
|
||||||
for rep in reps:
|
for rep in reps:
|
||||||
|
@ -513,25 +402,6 @@ class ComponentRendition:
|
||||||
else:
|
else:
|
||||||
matches.representations[cid] = " ".join(("" if r is None else r) for r in reps)
|
matches.representations[cid] = " ".join(("" if r is None else r) for r in reps)
|
||||||
|
|
||||||
# # just need to set representation to first group,
|
|
||||||
# # but in correct order, agreements last!
|
|
||||||
# representation_sorted_words = []
|
|
||||||
# for w_id, w in matches.matches[0].items():
|
|
||||||
# rep = component.representation
|
|
||||||
# if rep.isit(Rendition.WordForm) and rep.more[0] is WordFormSelection.Agreement:
|
|
||||||
# representation_sorted_words.append((w_id, w))
|
|
||||||
# else:
|
|
||||||
# representation_sorted_words.insert(0, (w_id, w))
|
|
||||||
|
|
||||||
# for w_id, w in representation_sorted_words:
|
|
||||||
# data = representations[w_id]
|
|
||||||
# if type(data[1]) is str:
|
|
||||||
# matches.representations[w_id] = None if data[0] else data[1]
|
|
||||||
# else:
|
|
||||||
# backup_msd = word_renderer.get_lemma_msd(w.lemma)
|
|
||||||
# backup_word = lemma_only_word(backup_msd)
|
|
||||||
# data[1](str(w_id), data[0], backup_word)
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return str(self.rendition)
|
return str(self.rendition)
|
||||||
|
|
||||||
|
@ -1329,8 +1199,7 @@ class ColocationIds:
|
||||||
def match_file(words, structures):
|
def match_file(words, structures):
|
||||||
matches = {s.id: [] for s in structures}
|
matches = {s.id: [] for s in structures}
|
||||||
|
|
||||||
for idx, s in tqdm(list(enumerate(structures))):
|
for s in tqdm(structures):
|
||||||
# logging.info("{}/{}: {:7s}".format(idx, len(structures), s.id))
|
|
||||||
for w in words:
|
for w in words:
|
||||||
mhere = s.match(w)
|
mhere = s.match(w)
|
||||||
logging.debug(" GOT: {}".format(len(mhere)))
|
logging.debug(" GOT: {}".format(len(mhere)))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user