Added fix for punctuations

This commit is contained in:
Luka 2020-10-08 18:31:50 +02:00
parent d5668c8b68
commit 1ea454f63c
3 changed files with 6 additions and 6 deletions

View File

@ -133,6 +133,7 @@ CODES = {
"Interjection": "I", "Interjection": "I",
"Abbreviation": "Y", "Abbreviation": "Y",
"Residual": "X", "Residual": "X",
"Punctuation": "Z",
'common': 'c', 'common': 'c',
'proper': 'p', 'proper': 'p',

View File

@ -147,9 +147,6 @@ class Component:
if not order.match(word, next_word): if not order.match(word, next_word):
continue continue
if word.lemma == 'aktivirati' and next_word.text == 'potomcih':
a = 0
match = next.match(next_word) match = next.match(next_word)
if match is not None: if match is not None:

View File

@ -12,7 +12,7 @@ class SyntacticStructure:
self.components = [] self.components = []
@staticmethod @staticmethod
def from_xml(xml): def from_xml(xml, no_stats):
st = SyntacticStructure() st = SyntacticStructure()
st.id = xml.get('id_nsss') st.id = xml.get('id_nsss')
st.lbs = xml.get('LBS') st.lbs = xml.get('LBS')
@ -47,7 +47,8 @@ class SyntacticStructure:
fake_root_component = Component({'cid': '#', 'type': 'other'}) fake_root_component = Component({'cid': '#', 'type': 'other'})
st.components = fake_root_component.find_next(deps, comps, restrs, forms) st.components = fake_root_component.find_next(deps, comps, restrs, forms)
st.determine_core2w() if not no_stats:
st.determine_core2w()
return st return st
def determine_core2w(self): def determine_core2w(self):
@ -98,6 +99,7 @@ class SyntacticStructure:
def build_structures(args): def build_structures(args):
filename = args.structures filename = args.structures
no_stats = args.out is None or args.stats is None
max_num_components = -1 max_num_components = -1
with open(filename, 'r') as fp: with open(filename, 'r') as fp:
@ -105,7 +107,7 @@ def build_structures(args):
structures = [] structures = []
for structure in et.iter('syntactic_structure'): for structure in et.iter('syntactic_structure'):
to_append = SyntacticStructure.from_xml(structure) to_append = SyntacticStructure.from_xml(structure, no_stats)
if to_append is None: if to_append is None:
continue continue