lemma_only bug fixed and skip-check-id instead of check-id (opt out).
This commit is contained in:
parent
5f7b5f969c
commit
8b47e2b317
14
wani.py
14
wani.py
|
@ -738,7 +738,7 @@ def is_root_id(id_):
|
|||
return len(id_.split('.')) == 3
|
||||
|
||||
|
||||
def load_corpus(filename, check_ids):
|
||||
def load_corpus(filename, skip_id_check):
|
||||
with open(filename, 'r') as fp:
|
||||
xmlstring = re.sub(' xmlns="[^"]+"', '', fp.read(), count=1)
|
||||
xmlstring = xmlstring.replace(' xml:', ' ')
|
||||
|
@ -763,7 +763,7 @@ def load_corpus(filename, check_ids):
|
|||
lfrom, dest = l.get('target').replace('#', '').split()
|
||||
|
||||
if lfrom in words:
|
||||
if check_ids and is_root_id(lfrom):
|
||||
if not skip_id_check and is_root_id(lfrom):
|
||||
logging.error("NOO: ", lfrom)
|
||||
sys.exit(1)
|
||||
|
||||
|
@ -817,7 +817,7 @@ class Writer:
|
|||
if not self.lemma_only:
|
||||
cols = [word.id, word.text] + cols + [word.msd]
|
||||
if not self.without_rep:
|
||||
cols += "" #not yet implemented...
|
||||
cols += [""] #not yet implemented...
|
||||
|
||||
return cols
|
||||
|
||||
|
@ -836,7 +836,7 @@ class Writer:
|
|||
to_write.extend(self.from_word(word))
|
||||
|
||||
# make them equal size
|
||||
to_write.extend([""] * (MAX_NUM_COMPONENTS * 5 - len(to_write)))
|
||||
to_write.extend([""] * (MAX_NUM_COMPONENTS * self.length() - len(to_write)))
|
||||
to_write = [s.id] + to_write + [colocation_ids.to_id(cid)]
|
||||
|
||||
if not self.without_rep:
|
||||
|
@ -896,7 +896,7 @@ def main(input_file, structures_file, args):
|
|||
logging.debug(str(s))
|
||||
|
||||
logging.info("LOADING TEXT...")
|
||||
words = load_corpus(input_file, args.check_ids)
|
||||
words = load_corpus(input_file, args.skip_id_check)
|
||||
|
||||
# useful for faster debugging...
|
||||
# import pickle
|
||||
|
@ -934,8 +934,8 @@ if __name__ == '__main__':
|
|||
parser.add_argument('structures', help='Structures definitions in xml file')
|
||||
parser.add_argument('--output', help='Output file (if none given, then output to stdout)')
|
||||
|
||||
parser.add_argument('--check-ids', help='Checks ids of <w> and <pc> to be in correct structure', action='store_true')
|
||||
parser.add_argument('--lemma_only', help='Will not write word ids, forms and msds in output', action='store_true')
|
||||
parser.add_argument('--skip-id-check', help='Skips checks for ids of <w> and <pc>, if they are in correct format', action='store_true')
|
||||
parser.add_argument('--lemma-only', help='Will not write word ids, forms and msds in output', action='store_true')
|
||||
parser.add_argument('--without-rep', help='Will not write representtaions in output', action='store_true')
|
||||
parser.add_argument('--group', help='Group collocations with same collocation ID', action='store_true')
|
||||
parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info")
|
||||
|
|
Loading…
Reference in New Issue
Block a user