IssueID #1224: added initial morphological lexicon support
This commit is contained in:
parent
0e0812c551
commit
3e129eba41
55
examples/morphological_lexicon/sloleks_example.xml
Normal file
55
examples/morphological_lexicon/sloleks_example.xml
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
<lexicon>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>klop</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit id="123456" type="single">
|
||||||
|
<lexeme lexical_unit_lexeme_id="654321">klop</lexeme>
|
||||||
|
</lexicalUnit>
|
||||||
|
<grammar>
|
||||||
|
<category>samostalnik</category>
|
||||||
|
<grammarFeature name="type">common</grammarFeature>
|
||||||
|
<grammarFeature name="gender">feminine</grammarFeature>
|
||||||
|
<grammarFeature name="pattern">Sz.1.2.o</grammarFeature>
|
||||||
|
</grammar>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">999999</measure>
|
||||||
|
</measureList>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<wordFormList>
|
||||||
|
<wordForm id="333333">
|
||||||
|
<msd language="sl" system="JOS">Sozim</msd>
|
||||||
|
<grammarFeatureList>
|
||||||
|
<grammarFeature name="case">nominative</grammarFeature>
|
||||||
|
<grammarFeature name="number">plural</grammarFeature>
|
||||||
|
</grammarFeatureList>
|
||||||
|
<formRepresentationList>
|
||||||
|
<formRepresentation id="4444444">
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">222222</measure>
|
||||||
|
</measureList>
|
||||||
|
<form>klopi</form>
|
||||||
|
</formRepresentation>
|
||||||
|
</formRepresentationList>
|
||||||
|
</wordForm>
|
||||||
|
<wordForm id="333334">
|
||||||
|
<msd language="sl" system="JOS">Sozdm</msd>
|
||||||
|
<grammarFeatureList>
|
||||||
|
<grammarFeature name="case">dative</grammarFeature>
|
||||||
|
<grammarFeature name="number">plural</grammarFeature>
|
||||||
|
</grammarFeatureList>
|
||||||
|
<formRepresentationList>
|
||||||
|
<formRepresentation id="4444445">
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">11111</measure>
|
||||||
|
</measureList>
|
||||||
|
<form>klopem</form>
|
||||||
|
</formRepresentation>
|
||||||
|
</formRepresentationList>
|
||||||
|
</wordForm>
|
||||||
|
</wordFormList>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
</lexicon>
|
|
@ -48,6 +48,13 @@
|
||||||
</xsd:simpleContent>
|
</xsd:simpleContent>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- List of grammatical features -->
|
||||||
|
<xsd:complexType name="grammarFeatureListType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="grammarFeature" type="grammarFeatureType" minOccurs="0" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
<!-- Lemma-disambiguating feature, more open-ended than grammarFeatureType -->
|
<!-- Lemma-disambiguating feature, more open-ended than grammarFeatureType -->
|
||||||
<xsd:complexType name="homonymyFeatureType">
|
<xsd:complexType name="homonymyFeatureType">
|
||||||
<xsd:simpleContent>
|
<xsd:simpleContent>
|
||||||
|
@ -490,4 +497,57 @@
|
||||||
</xsd:attribute>
|
</xsd:attribute>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- MSD code, in a particular language -->
|
||||||
|
<xsd:complexType name="msdType">
|
||||||
|
<xsd:simpleContent>
|
||||||
|
<xsd:extension base="xsd:string">
|
||||||
|
<xsd:attribute name="language" type="xsd:string" use="required"/>
|
||||||
|
<xsd:attribute name="system" type="xsd:string" use="required"/>
|
||||||
|
</xsd:extension>
|
||||||
|
</xsd:simpleContent>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- Normalisation types for form representations -->
|
||||||
|
<xsd:simpleType name="formNormType">
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="standard"/>
|
||||||
|
<xsd:enumeration value="non-standard"/>
|
||||||
|
<xsd:enumeration value="variant"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
|
||||||
|
<!-- Form representation wrapper -->
|
||||||
|
<xsd:complexType name="formRepresentationType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="measureList" type="measureListType"/>
|
||||||
|
<xsd:element name="norm" type="formNormType" minOccurs="0"/>
|
||||||
|
<xsd:element name="form" type="xsd:string"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="id" type="xsd:int" use="required"/>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- List of form representations -->
|
||||||
|
<xsd:complexType name="formRepresentationListType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="formRepresentation" type="formRepresentationType" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- Word form wrapper -->
|
||||||
|
<xsd:complexType name="wordFormType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="msd" type="msdType" maxOccurs="unbounded"/>
|
||||||
|
<xsd:element name="grammarFeatureList" type="grammarFeatureListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="formRepresentationList" type="formRepresentationListType"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="id" type="xsd:int" use="required"/>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- List of word forms -->
|
||||||
|
<xsd:complexType name="wordFormListType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="wordForm" type="wordFormType" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
</xsd:schema>
|
</xsd:schema>
|
||||||
|
|
42
resources/schema/morphological_lexicon.xsd
Normal file
42
resources/schema/morphological_lexicon.xsd
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
|
||||||
|
|
||||||
|
<xsd:import namespace="http://www.w3.org/XML/1998/namespace"
|
||||||
|
schemaLocation="http://www.w3.org/2001/xml.xsd" />
|
||||||
|
<xsd:include schemaLocation="inventory.xsd"/>
|
||||||
|
|
||||||
|
<xsd:element name="lexicon">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="entry" minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="head">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="status" type="statusType" minOccurs="0"/>
|
||||||
|
<xsd:element name="headword" type="headwordType"/>
|
||||||
|
<xsd:element name="lexicalUnit" type="lexicalUnitType" minOccurs="0"/>
|
||||||
|
<xsd:element name="grammar" type="grammarType" minOccurs="0"/>
|
||||||
|
<xsd:element name="measureList" type="measureListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="relatedEntryList" type="relatedEntryListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="comment" type="commentType" minOccurs="0"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="body">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="wordFormList" type="wordFormListType"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
|
||||||
|
</xsd:schema>
|
Loading…
Reference in New Issue
Block a user