Removing deprecated commented out code.

This commit is contained in:
Ozbolt Menegatti 2019-06-01 10:40:44 +02:00
parent ed83b2b9c4
commit fe4c95939f

133
wani.py
View File

@ -381,80 +381,6 @@ class ComponentRendition:
representations[agr][0].agreement.append(rep) representations[agr][0].agreement.append(rep)
# representations = {
# c.idx: [[], None] if c.representation.isit(Rendition.WordForm) else [True, ""]
# for c in structure.components
# }
# found_agreements = {}
# def render_form(component_id, lst, backup_word):
# if backup_word is not None:
# lst.append(backup_word)
# text_forms = {}
# msd_lemma_txt_triplets = Counter([(w.msd, w.lemma, w.text) for w in lst])
# for (msd, lemma, text), _n in reversed(msd_lemma_txt_triplets.most_common()):
# text_forms[(msd, lemma)] = text
# lst_ctr = []
# for word in lst:
# lst_ctr.append((word.msd, word.lemma))
# sorted_lst = sorted(set(lst_ctr), key=lst.count)
# for word_msd, word_lemma in sorted_lst:
# if component_id in found_agreements:
# other_component_id, other_word, agreements, other_texts = found_agreements[component_id]
# agr = are_agreements_ok(word_msd, other_word.lemma, other_word.msd, agreements, other_texts)
# if agr is None:
# continue
# matches.representations[other_component_id] = agr
# if word_lemma is not None:
# matches.representations[component_id] = text_forms[(msd, lemma)] #word_renderer.render(word_lemma, word_msd)
# break
# def are_agreements_ok(w1_msd, ow_lemma, ow_msd, agreements, ow_texts):
# for w2_msd, w2_txt in word_renderer.available_words(ow_lemma, ow_texts):
# if ow_msd[0] != w2_msd[0]:
# continue
# if check_agreement(w1_msd, w2_msd, agreements):
# return w2_txt
# def check_agreement(msd1, msd2, agreements):
# for agr_case in agreements:
# t1 = msd1[0]
# # if not in msd, some strange msd was tries, skipping...
# if agr_case not in TAGSET[t1]:
# logging.warning("Cannot do agreement: {} for msd {} not found!".format(agr_case, msd1))
# return False
# v1 = TAGSET[t1].index(agr_case)
# # if none specified: nedolocnik, always agrees
# if v1 + 1 >= len(msd1):
# continue
# # first is uppercase, not in TAGSET
# m1 = msd1[v1 + 1]
# # REPEAT (not DRY!)
# t2 = msd2[0]
# if agr_case not in TAGSET[t2]:
# logging.warning("Cannot do agreement: {} for msd {} not found!".format(agr_case, msd2))
# return False
# v2 = TAGSET[t2].index(agr_case)
# if v2 + 1 >= len(msd2):
# continue
# m2 = msd2[v2 + 1]
# # match!
# if '-' not in [m1, m2] and m1 != m2:
# return False
# return True
for words in matches.matches: for words in matches.matches:
# first pass, check everything but agreements # first pass, check everything but agreements
for w_id, w in words.items(): for w_id, w in words.items():
@ -462,43 +388,6 @@ class ComponentRendition:
component_representations = representations[component.idx] component_representations = representations[component.idx]
for representation in component_representations: for representation in component_representations:
representation.add_word(w) representation.add_word(w)
# if rep.isit(Rendition.Lemma):
# representations[w_id][0] = False
# representations[w_id][1] = w.lemma
# elif rep.isit(Rendition.Lexis):
# representations[w_id][0] = False
# representations[w_id][1] = rep.more
# elif rep.isit(Rendition.Unknown):
# representations[w_id][0] = False
# representations[w_id][1] = ""
# # it HAS to be word_form now
# else:
# assert(rep.isit(Rendition.WordForm))
# wf_type, more = rep.more
# add = True
# if wf_type is WordFormSelection.Msd:
# add = check_msd(w, more)
# func = render_form
# elif wf_type is WordFormSelection.All:
# func = render_all
# elif wf_type is WordFormSelection.Any:
# func = render_form
# else:
# assert(wf_type is WordFormSelection.Agreement)
# other_w, agreements = more
# if other_w not in found_agreements:
# found_agreements[other_w] = (w_id, w, agreements, [])
# found_agreements[other_w][-1].append((w.msd, w.text))
# func = lambda *x: None
# representations[w_id][1] = func
# if add:
# representations[w_id][0].append(w)
for cid, reps in representations.items(): for cid, reps in representations.items():
for rep in reps: for rep in reps:
@ -513,25 +402,6 @@ class ComponentRendition:
else: else:
matches.representations[cid] = " ".join(("" if r is None else r) for r in reps) matches.representations[cid] = " ".join(("" if r is None else r) for r in reps)
# # just need to set representation to first group,
# # but in correct order, agreements last!
# representation_sorted_words = []
# for w_id, w in matches.matches[0].items():
# rep = component.representation
# if rep.isit(Rendition.WordForm) and rep.more[0] is WordFormSelection.Agreement:
# representation_sorted_words.append((w_id, w))
# else:
# representation_sorted_words.insert(0, (w_id, w))
# for w_id, w in representation_sorted_words:
# data = representations[w_id]
# if type(data[1]) is str:
# matches.representations[w_id] = None if data[0] else data[1]
# else:
# backup_msd = word_renderer.get_lemma_msd(w.lemma)
# backup_word = lemma_only_word(backup_msd)
# data[1](str(w_id), data[0], backup_word)
def __str__(self): def __str__(self):
return str(self.rendition) return str(self.rendition)
@ -1329,8 +1199,7 @@ class ColocationIds:
def match_file(words, structures): def match_file(words, structures):
matches = {s.id: [] for s in structures} matches = {s.id: [] for s in structures}
for idx, s in tqdm(list(enumerate(structures))): for s in tqdm(structures):
# logging.info("{}/{}: {:7s}".format(idx, len(structures), s.id))
for w in words: for w in words:
mhere = s.match(w) mhere = s.match(w)
logging.debug(" GOT: {}".format(len(mhere))) logging.debug(" GOT: {}".format(len(mhere)))