able to load pickled structures

This commit is contained in:
Ozbolt Menegatti 2019-06-16 01:00:22 +02:00
parent f0109771aa
commit 37acabc076
2 changed files with 12 additions and 4 deletions

View File

@ -1,5 +1,6 @@
from xml.etree import ElementTree from xml.etree import ElementTree
import logging import logging
import pickle
from component import Component, ComponentType from component import Component, ComponentType
from lemma_features import get_lemma_features from lemma_features import get_lemma_features
@ -92,7 +93,12 @@ class SyntacticStructure:
return [] if matches is None else matches return [] if matches is None else matches
def build_structures(filename): def build_structures(args):
filename = args.structures
if args.pickled_structures:
with open(filename, 'rb') as fp:
return pickle.load(fp)
max_num_components = -1 max_num_components = -1
with open(filename, 'r') as fp: with open(filename, 'r') as fp:
et = ElementTree.XML(fp.read()) et = ElementTree.XML(fp.read())

View File

@ -107,8 +107,8 @@ def match_file(words, structures):
return matches return matches
def main(structures_file, args): def main(args):
structures, lemma_msds, max_num_components = build_structures(structures_file) structures, lemma_msds, max_num_components = build_structures(args)
match_store = MatchStore(args) match_store = MatchStore(args)
word_stats = WordStats(lemma_msds) word_stats = WordStats(lemma_msds)
@ -218,11 +218,13 @@ if __name__ == '__main__':
help='Tag for separators, usually pc or c', default="pc") help='Tag for separators, usually pc or c', default="pc")
parser.add_argument('--parallel', parser.add_argument('--parallel',
help='Run in multiple processes, should speed things up') help='Run in multiple processes, should speed things up')
parser.add_argument('--match-to-file', help='Do not use!') parser.add_argument('--match-to-file', help='Do not use!')
parser.add_argument('--pickled-structures', help='Do not use!', action='store_true')
args = parser.parse_args() args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper()) logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
start = time.time() start = time.time()
main(args.structures, args) main(args)
logging.info("TIME: {}".format(time.time() - start)) logging.info("TIME: {}".format(time.time() - start))