Redmine #2619: more specific cordex-related tweaks
This commit is contained in:
parent
f62f4d3c5f
commit
4c1cc2112c
|
@ -50,7 +50,7 @@ def get_mwe_index_map(mapper_file_name):
|
||||||
def get_mwe_structure_map(csv_file_name, mwe_components_map, mwe_index_map, structure_components_map):
|
def get_mwe_structure_map(csv_file_name, mwe_components_map, mwe_index_map, structure_components_map):
|
||||||
mwe_structure_map = defaultdict(set)
|
mwe_structure_map = defaultdict(set)
|
||||||
csv_file = codecs.open(csv_file_name, 'r')
|
csv_file = codecs.open(csv_file_name, 'r')
|
||||||
reader = csv.DictReader(csv_file, delimiter=',')
|
reader = csv.DictReader(csv_file, delimiter='\t')
|
||||||
for row in reader:
|
for row in reader:
|
||||||
structure_id = int(row['Structure_ID'])
|
structure_id = int(row['Structure_ID'])
|
||||||
collocation_id = int(row['Collocation_ID'])
|
collocation_id = int(row['Collocation_ID'])
|
||||||
|
|
|
@ -8,7 +8,7 @@ FILE_MAP = {'strings-list': 'strings.txt',
|
||||||
'tei-ids-collocation': 'tei_ids_collocations.xml',
|
'tei-ids-collocation': 'tei_ids_collocations.xml',
|
||||||
'tei-ids-all': 'tei_ids_all.xml',
|
'tei-ids-all': 'tei_ids_all.xml',
|
||||||
'collocations': 'collocation_matches.csv',
|
'collocations': 'collocation_matches.csv',
|
||||||
'collocation-mapper': 'mapper.txt',
|
'collocation-mapper': 'mapper.csv',
|
||||||
'structures-old': 'structures_old.xml',
|
'structures-old': 'structures_old.xml',
|
||||||
'structures-new': 'structures_new.xml',
|
'structures-new': 'structures_new.xml',
|
||||||
'dictionary': 'dictionary.xml',
|
'dictionary': 'dictionary.xml',
|
||||||
|
|
|
@ -204,8 +204,9 @@ class Pipeline:
|
||||||
structure_file_name = self.file_map['structures-old']
|
structure_file_name = self.file_map['structures-old']
|
||||||
input_file_name = self.file_map['tei-initial']
|
input_file_name = self.file_map['tei-initial']
|
||||||
output_file_name = self.file_map['collocations']
|
output_file_name = self.file_map['collocations']
|
||||||
|
mapper_file_name = self.file_map['collocation-mapper']
|
||||||
|
|
||||||
extractor = cordex.Pipeline(structure_file_name, fixed_restriction_order=True, statistics=False, collocation_sentence_map_dest=self.tmp_directory)
|
extractor = cordex.Pipeline(structure_file_name, fixed_restriction_order=True, statistics=False, collocation_sentence_map_dest=mapper_file_name)
|
||||||
extraction = extractor(input_file_name)
|
extraction = extractor(input_file_name)
|
||||||
extraction.write(output_file_name)
|
extraction.write(output_file_name)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user