diff --git a/structure_assignment/assign_collocation_structures.py b/structure_assignment/assign_collocation_structures.py index c153d78..54aba93 100644 --- a/structure_assignment/assign_collocation_structures.py +++ b/structure_assignment/assign_collocation_structures.py @@ -50,7 +50,7 @@ def get_mwe_index_map(mapper_file_name): def get_mwe_structure_map(csv_file_name, mwe_components_map, mwe_index_map, structure_components_map): mwe_structure_map = defaultdict(set) csv_file = codecs.open(csv_file_name, 'r') - reader = csv.DictReader(csv_file, delimiter=',') + reader = csv.DictReader(csv_file, delimiter='\t') for row in reader: structure_id = int(row['Structure_ID']) collocation_id = int(row['Collocation_ID']) diff --git a/structure_assignment/constants.py b/structure_assignment/constants.py index 336aaa9..8d1b780 100644 --- a/structure_assignment/constants.py +++ b/structure_assignment/constants.py @@ -8,7 +8,7 @@ FILE_MAP = {'strings-list': 'strings.txt', 'tei-ids-collocation': 'tei_ids_collocations.xml', 'tei-ids-all': 'tei_ids_all.xml', 'collocations': 'collocation_matches.csv', - 'collocation-mapper': 'mapper.txt', + 'collocation-mapper': 'mapper.csv', 'structures-old': 'structures_old.xml', 'structures-new': 'structures_new.xml', 'dictionary': 'dictionary.xml', diff --git a/structure_assignment/pipeline.py b/structure_assignment/pipeline.py index 8e9ae51..a62a403 100644 --- a/structure_assignment/pipeline.py +++ b/structure_assignment/pipeline.py @@ -204,8 +204,9 @@ class Pipeline: structure_file_name = self.file_map['structures-old'] input_file_name = self.file_map['tei-initial'] output_file_name = self.file_map['collocations'] + mapper_file_name = self.file_map['collocation-mapper'] - extractor = cordex.Pipeline(structure_file_name, fixed_restriction_order=True, statistics=False, collocation_sentence_map_dest=self.tmp_directory) + extractor = cordex.Pipeline(structure_file_name, fixed_restriction_order=True, statistics=False, collocation_sentence_map_dest=mapper_file_name) extraction = extractor(input_file_name) extraction.write(output_file_name)