implementing multiple agreements to one cid.
This commit is contained in:
		
							parent
							
								
									0249ef1523
								
							
						
					
					
						commit
						ed83b2b9c4
					
				
							
								
								
									
										68
									
								
								wani.py
									
									
									
									
									
								
							
							
						
						
									
										68
									
								
								wani.py
									
									
									
									
									
								
							@ -17,7 +17,6 @@ from tqdm import tqdm
 | 
			
		||||
 | 
			
		||||
MAX_NUM_COMPONENTS = 5
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
CODES = {
 | 
			
		||||
    "Noun": "N",
 | 
			
		||||
    "Verb": "V",
 | 
			
		||||
@ -166,10 +165,10 @@ class ComponentRepresentation:
 | 
			
		||||
 | 
			
		||||
        self.words = []
 | 
			
		||||
        self.rendition_text = None
 | 
			
		||||
        self.agreement = None
 | 
			
		||||
        self.agreement = []
 | 
			
		||||
    
 | 
			
		||||
    def get_agreement(self):
 | 
			
		||||
        return None
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
    def add_word(self, word):
 | 
			
		||||
        self.words.append(word)
 | 
			
		||||
@ -210,12 +209,17 @@ class WordFormAnyCR(ComponentRepresentation):
 | 
			
		||||
        sorted_words = sorted(set(words_counter), key=lambda x: -words_counter.count(x))
 | 
			
		||||
 | 
			
		||||
        for word_msd, word_lemma in sorted_words:
 | 
			
		||||
            if self.agreement is not None:
 | 
			
		||||
                if self.agreement.match(word_msd):
 | 
			
		||||
                    if word_lemma is None:
 | 
			
		||||
                        return None
 | 
			
		||||
                    else:
 | 
			
		||||
                        return text_forms[(word_msd, word_lemma)]
 | 
			
		||||
            for agr in self.agreement:
 | 
			
		||||
                if not agr.match(word_msd):
 | 
			
		||||
                    break
 | 
			
		||||
            else:
 | 
			
		||||
                for agr in self.agreement:
 | 
			
		||||
                    agr.confirm_match()
 | 
			
		||||
 | 
			
		||||
                if word_lemma is None:
 | 
			
		||||
                    return None
 | 
			
		||||
                else:
 | 
			
		||||
                    return text_forms[(word_msd, word_lemma)]
 | 
			
		||||
        
 | 
			
		||||
class WordFormMsdCR(WordFormAnyCR):
 | 
			
		||||
    def __init__(self, *args):
 | 
			
		||||
@ -253,6 +257,7 @@ class WordFormAgreementCR(ComponentRepresentation):
 | 
			
		||||
    def __init__(self, data, word_renderer):
 | 
			
		||||
        super().__init__(data, word_renderer)
 | 
			
		||||
        self.agree_with, self.data = self.data
 | 
			
		||||
        self.rendition_candidate = None
 | 
			
		||||
    
 | 
			
		||||
    def get_agreement(self):
 | 
			
		||||
        return self.agree_with
 | 
			
		||||
@ -269,10 +274,13 @@ class WordFormAgreementCR(ComponentRepresentation):
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            if WordFormAgreementCR.check_agreement(word_msd, candidate_msd, agreements):
 | 
			
		||||
                self.rendition_text = candidate_text
 | 
			
		||||
                self.rendition_candidate = candidate_text
 | 
			
		||||
                return True
 | 
			
		||||
 | 
			
		||||
        return False
 | 
			
		||||
    
 | 
			
		||||
    def confirm_match(self):
 | 
			
		||||
        self.rendition_text = self.rendition_candidate
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def check_agreement(msd1, msd2, agreements):
 | 
			
		||||
@ -361,20 +369,17 @@ class ComponentRendition:
 | 
			
		||||
        
 | 
			
		||||
        for cid, reps in representations.items():
 | 
			
		||||
            for rep in reps:
 | 
			
		||||
                agr = rep.get_agreement()
 | 
			
		||||
                if agr is None:
 | 
			
		||||
                    continue
 | 
			
		||||
                for agr in rep.get_agreement():
 | 
			
		||||
                    if len(representations[agr]) != 1:
 | 
			
		||||
                        n = len(representations[agr])
 | 
			
		||||
                        raise NotImplementedError(
 | 
			
		||||
                            "Structure {}: ".format(structure.id) +
 | 
			
		||||
                            "component {} has agreement".format(cid) +
 | 
			
		||||
                            " with component {}".format(agr) +
 | 
			
		||||
                            ", however there are {} (!= 1) representations".format(n) +
 | 
			
		||||
                            " of component {}!".format(agr))
 | 
			
		||||
 | 
			
		||||
                if len(representations[agr]) != 1:
 | 
			
		||||
                    n = len(representations[agr])
 | 
			
		||||
                    raise NotImplementedError(
 | 
			
		||||
                        "Structure {}: ".format(structure.id) +
 | 
			
		||||
                        "component {} has agreement".format(cid) +
 | 
			
		||||
                        " with component {}".format(agr) +
 | 
			
		||||
                        ", however there are {} (!= 1) representations".format(n) +
 | 
			
		||||
                        " of component {}!".format(agr))
 | 
			
		||||
 | 
			
		||||
                representations[agr][0].agreement = rep
 | 
			
		||||
                    representations[agr][0].agreement.append(rep)
 | 
			
		||||
 | 
			
		||||
        # representations = {
 | 
			
		||||
        #     c.idx: [[], None] if c.representation.isit(Rendition.WordForm) else [True, ""]
 | 
			
		||||
@ -494,6 +499,7 @@ class ComponentRendition:
 | 
			
		||||
                #     if add:
 | 
			
		||||
                #         representations[w_id][0].append(w)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        for cid, reps in representations.items():
 | 
			
		||||
            for rep in reps:
 | 
			
		||||
                rep.render()
 | 
			
		||||
@ -1178,16 +1184,17 @@ class Writer:
 | 
			
		||||
    def length(self):
 | 
			
		||||
        return 4 if self.all else 3
 | 
			
		||||
 | 
			
		||||
    def from_word(self, word, representation):
 | 
			
		||||
    def from_word(self, word, representation, rep_exists):
 | 
			
		||||
        if word is None:
 | 
			
		||||
            return [""] * self.length()
 | 
			
		||||
        elif self.all:
 | 
			
		||||
            return [word.id, word.text, word.lemma, word.msd]
 | 
			
		||||
        elif not rep_exists:
 | 
			
		||||
            return [word.lemma, "", ""]
 | 
			
		||||
        elif representation is None:
 | 
			
		||||
            return [word.lemma, word.lemma, "lemma_fallback"]
 | 
			
		||||
        else:
 | 
			
		||||
            if representation is None:
 | 
			
		||||
                return [word.lemma, word.lemma, "lemma_fallback"]
 | 
			
		||||
            else:
 | 
			
		||||
                return [word.lemma, representation, "ok"]
 | 
			
		||||
            return [word.lemma, representation, "ok"]
 | 
			
		||||
    
 | 
			
		||||
    def sorted_rows(self, rows):
 | 
			
		||||
        if self.sort_by < 0 or len(rows) < 2:
 | 
			
		||||
@ -1218,8 +1225,9 @@ class Writer:
 | 
			
		||||
            for idx, _comp in enumerate(components):
 | 
			
		||||
                idx = str(idx + 1)
 | 
			
		||||
                word = m[idx] if idx in m else None
 | 
			
		||||
                rep = rprsnt[idx] if idx in rprsnt else None
 | 
			
		||||
                to_write.extend(self.from_word(word, rep))
 | 
			
		||||
                rep_exists = idx in rprsnt
 | 
			
		||||
                rep = rprsnt[idx] if rep_exists else None
 | 
			
		||||
                to_write.extend(self.from_word(word, rep, rep_exists))
 | 
			
		||||
                representation += " " + to_write[-2]
 | 
			
		||||
 | 
			
		||||
            # make them equal size
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user