Added fix for punctuations
This commit is contained in:
parent
d5668c8b68
commit
1ea454f63c
|
@ -133,6 +133,7 @@ CODES = {
|
|||
"Interjection": "I",
|
||||
"Abbreviation": "Y",
|
||||
"Residual": "X",
|
||||
"Punctuation": "Z",
|
||||
|
||||
'common': 'c',
|
||||
'proper': 'p',
|
||||
|
|
|
@ -147,9 +147,6 @@ class Component:
|
|||
if not order.match(word, next_word):
|
||||
continue
|
||||
|
||||
if word.lemma == 'aktivirati' and next_word.text == 'potomcih':
|
||||
a = 0
|
||||
|
||||
match = next.match(next_word)
|
||||
|
||||
if match is not None:
|
||||
|
|
|
@ -12,7 +12,7 @@ class SyntacticStructure:
|
|||
self.components = []
|
||||
|
||||
@staticmethod
|
||||
def from_xml(xml):
|
||||
def from_xml(xml, no_stats):
|
||||
st = SyntacticStructure()
|
||||
st.id = xml.get('id_nsss')
|
||||
st.lbs = xml.get('LBS')
|
||||
|
@ -47,7 +47,8 @@ class SyntacticStructure:
|
|||
fake_root_component = Component({'cid': '#', 'type': 'other'})
|
||||
st.components = fake_root_component.find_next(deps, comps, restrs, forms)
|
||||
|
||||
st.determine_core2w()
|
||||
if not no_stats:
|
||||
st.determine_core2w()
|
||||
return st
|
||||
|
||||
def determine_core2w(self):
|
||||
|
@ -98,6 +99,7 @@ class SyntacticStructure:
|
|||
|
||||
def build_structures(args):
|
||||
filename = args.structures
|
||||
no_stats = args.out is None or args.stats is None
|
||||
|
||||
max_num_components = -1
|
||||
with open(filename, 'r') as fp:
|
||||
|
@ -105,7 +107,7 @@ def build_structures(args):
|
|||
|
||||
structures = []
|
||||
for structure in et.iter('syntactic_structure'):
|
||||
to_append = SyntacticStructure.from_xml(structure)
|
||||
to_append = SyntacticStructure.from_xml(structure, no_stats)
|
||||
if to_append is None:
|
||||
continue
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user