IssueID #1224: added initial morphological lexicon support

This commit is contained in:
Cyprian Laskowski 2020-10-15 21:13:47 +02:00
parent 0e0812c551
commit 3e129eba41
3 changed files with 157 additions and 0 deletions

View File

@ -0,0 +1,55 @@
<lexicon>
<entry>
<head>
<headword>
<lemma>klop</lemma>
</headword>
<lexicalUnit id="123456" type="single">
<lexeme lexical_unit_lexeme_id="654321">klop</lexeme>
</lexicalUnit>
<grammar>
<category>samostalnik</category>
<grammarFeature name="type">common</grammarFeature>
<grammarFeature name="gender">feminine</grammarFeature>
<grammarFeature name="pattern">Sz.1.2.o</grammarFeature>
</grammar>
<measureList>
<measure type="frequency" source="Gigafida 2.0">999999</measure>
</measureList>
</head>
<body>
<wordFormList>
<wordForm id="333333">
<msd language="sl" system="JOS">Sozim</msd>
<grammarFeatureList>
<grammarFeature name="case">nominative</grammarFeature>
<grammarFeature name="number">plural</grammarFeature>
</grammarFeatureList>
<formRepresentationList>
<formRepresentation id="4444444">
<measureList>
<measure type="frequency" source="Gigafida 2.0">222222</measure>
</measureList>
<form>klopi</form>
</formRepresentation>
</formRepresentationList>
</wordForm>
<wordForm id="333334">
<msd language="sl" system="JOS">Sozdm</msd>
<grammarFeatureList>
<grammarFeature name="case">dative</grammarFeature>
<grammarFeature name="number">plural</grammarFeature>
</grammarFeatureList>
<formRepresentationList>
<formRepresentation id="4444445">
<measureList>
<measure type="frequency" source="Gigafida 2.0">11111</measure>
</measureList>
<form>klopem</form>
</formRepresentation>
</formRepresentationList>
</wordForm>
</wordFormList>
</body>
</entry>
</lexicon>

View File

@ -48,6 +48,13 @@
</xsd:simpleContent>
</xsd:complexType>
<!-- List of grammatical features -->
<xsd:complexType name="grammarFeatureListType">
<xsd:sequence>
<xsd:element name="grammarFeature" type="grammarFeatureType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence>
</xsd:complexType>
<!-- Lemma-disambiguating feature, more open-ended than grammarFeatureType -->
<xsd:complexType name="homonymyFeatureType">
<xsd:simpleContent>
@ -490,4 +497,57 @@
</xsd:attribute>
</xsd:complexType>
<!-- MSD code, in a particular language -->
<xsd:complexType name="msdType">
<xsd:simpleContent>
<xsd:extension base="xsd:string">
<xsd:attribute name="language" type="xsd:string" use="required"/>
<xsd:attribute name="system" type="xsd:string" use="required"/>
</xsd:extension>
</xsd:simpleContent>
</xsd:complexType>
<!-- Normalisation types for form representations -->
<xsd:simpleType name="formNormType">
<xsd:restriction base="xsd:string">
<xsd:enumeration value="standard"/>
<xsd:enumeration value="non-standard"/>
<xsd:enumeration value="variant"/>
</xsd:restriction>
</xsd:simpleType>
<!-- Form representation wrapper -->
<xsd:complexType name="formRepresentationType">
<xsd:sequence>
<xsd:element name="measureList" type="measureListType"/>
<xsd:element name="norm" type="formNormType" minOccurs="0"/>
<xsd:element name="form" type="xsd:string"/>
</xsd:sequence>
<xsd:attribute name="id" type="xsd:int" use="required"/>
</xsd:complexType>
<!-- List of form representations -->
<xsd:complexType name="formRepresentationListType">
<xsd:sequence>
<xsd:element name="formRepresentation" type="formRepresentationType" maxOccurs="unbounded"/>
</xsd:sequence>
</xsd:complexType>
<!-- Word form wrapper -->
<xsd:complexType name="wordFormType">
<xsd:sequence>
<xsd:element name="msd" type="msdType" maxOccurs="unbounded"/>
<xsd:element name="grammarFeatureList" type="grammarFeatureListType" minOccurs="0"/>
<xsd:element name="formRepresentationList" type="formRepresentationListType"/>
</xsd:sequence>
<xsd:attribute name="id" type="xsd:int" use="required"/>
</xsd:complexType>
<!-- List of word forms -->
<xsd:complexType name="wordFormListType">
<xsd:sequence>
<xsd:element name="wordForm" type="wordFormType" maxOccurs="unbounded"/>
</xsd:sequence>
</xsd:complexType>
</xsd:schema>

View File

@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace"
schemaLocation="http://www.w3.org/2001/xml.xsd" />
<xsd:include schemaLocation="inventory.xsd"/>
<xsd:element name="lexicon">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="entry" minOccurs="0" maxOccurs="unbounded">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="head">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="status" type="statusType" minOccurs="0"/>
<xsd:element name="headword" type="headwordType"/>
<xsd:element name="lexicalUnit" type="lexicalUnitType" minOccurs="0"/>
<xsd:element name="grammar" type="grammarType" minOccurs="0"/>
<xsd:element name="measureList" type="measureListType" minOccurs="0"/>
<xsd:element name="relatedEntryList" type="relatedEntryListType" minOccurs="0"/>
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
<xsd:element name="comment" type="commentType" minOccurs="0"/>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
<xsd:element name="body">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="wordFormList" type="wordFormListType"/>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:schema>