Added fix for punctuations

This commit is contained in:
Luka 2020-10-08 18:31:50 +02:00
parent d5668c8b68
commit 1ea454f63c
3 changed files with 6 additions and 6 deletions

View File

@ -133,6 +133,7 @@ CODES = {
"Interjection": "I",
"Abbreviation": "Y",
"Residual": "X",
"Punctuation": "Z",
'common': 'c',
'proper': 'p',

View File

@ -147,9 +147,6 @@ class Component:
if not order.match(word, next_word):
continue
if word.lemma == 'aktivirati' and next_word.text == 'potomcih':
a = 0
match = next.match(next_word)
if match is not None:

View File

@ -12,7 +12,7 @@ class SyntacticStructure:
self.components = []
@staticmethod
def from_xml(xml):
def from_xml(xml, no_stats):
st = SyntacticStructure()
st.id = xml.get('id_nsss')
st.lbs = xml.get('LBS')
@ -47,7 +47,8 @@ class SyntacticStructure:
fake_root_component = Component({'cid': '#', 'type': 'other'})
st.components = fake_root_component.find_next(deps, comps, restrs, forms)
st.determine_core2w()
if not no_stats:
st.determine_core2w()
return st
def determine_core2w(self):
@ -98,6 +99,7 @@ class SyntacticStructure:
def build_structures(args):
filename = args.structures
no_stats = args.out is None or args.stats is None
max_num_components = -1
with open(filename, 'r') as fp:
@ -105,7 +107,7 @@ def build_structures(args):
structures = []
for structure in et.iter('syntactic_structure'):
to_append = SyntacticStructure.from_xml(structure)
to_append = SyntacticStructure.from_xml(structure, no_stats)
if to_append is None:
continue