IssueID #1098: added schema support and example for MWE datasets

This commit is contained in:
Cyprian Laskowski 2020-02-27 10:27:45 +01:00
parent 0ac4b12d38
commit fd72e1c32d
4 changed files with 213 additions and 4 deletions

View File

@ -1,3 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<lexicon/>

View File

@ -0,0 +1,156 @@
<dictionary>
<entry>
<head>
<headword>
<lemma>kdo ni po kisli juhi priplaval</lemma>
</headword>
<lexicalUnit id="757847" type="MWE" origin="lbs_fe.1012" structure_id="1000504">
<component num="1">
<lexeme type="argument">kdo</lexeme>
</component>
<component num="2">
<lexeme lemma="biti" msd="Va-r3s-y" sloleks="">ni</lexeme>
</component>
<component num="3">
<lexeme lemma="po" msd="Sl" sloleks="">po</lexeme>
</component>
<component num="4">
<lexeme lemma="kisel" msd="Agpfsl" sloleks="">kisli</lexeme>
</component>
<component num="5">
<lexeme lemma="juha" msd="Ncfsl" sloleks="">juhi</lexeme>
</component>
<component num="6">
<lexeme lemma="priplavati" msd="Vmep-sm" sloleks="">priplaval</lexeme>
</component>
</lexicalUnit>
<measureList>
<measure type="raw_frequency" source="Gigafida 2.0">1</measure>
<!-- iz Gigafide -->
</measureList>
<relatedEntryList>
<relatedEntry origin="lbs_fe.2194">kdo ni po kisli župi priplaval</relatedEntry>
</relatedEntryList>
</head>
<body>
<senseList>
<sense id="317">
<definitionList/>
<exampleContainerList>
<exampleContainer>
<corpusExample exampleId="GF.111.11.11">
<comp num="1">Župani</comp>
<comp num="2">so</comp> kljub volitvam povečini na vodilna občinska mesta
<comp num="6">priplavali</comp>
<comp num="3">po</comp>
<comp num="4">kisli</comp>
<comp num="5">juhi</comp> ali župi, kot jo nezmotljivo nemčimo na
štajerskem območju. </corpusExample>
</exampleContainer>
</exampleContainerList>
</sense>
</senseList>
</body>
</entry>
<entry>
<head>
<headword>
<lemma>kdo ni po kisli župi priplaval</lemma>
</headword>
<lexicalUnit id="3243332" type="MWE" origin="lbs_fe.2194" structure_id="1000504">
<component num="1">
<lexeme type="argument">kdo</lexeme>
</component>
<component num="2">
<lexeme lemma="biti" msd="Va-r3s-y" sloleks="">ni</lexeme>
</component>
<component num="3">
<lexeme lemma="po" msd="Sl" sloleks="">po</lexeme>
</component>
<component num="4">
<lexeme lemma="kisel" msd="Agpfsl" sloleks="">kisli</lexeme>
</component>
<component num="5">
<lexeme lemma="župa" msd="Ncfsl" sloleks="">župi</lexeme>
</component>
<component num="6">
<lexeme lemma="priplavati" msd="Vmep-sm" sloleks="">priplaval</lexeme>
</component>
</lexicalUnit>
<measureList>
<measure type="raw_frequency" source="GF2.0">1</measure>
<!-- iz Gigafide -->
</measureList>
<relatedEntryList>
<relatedEntry origin="lbs_fe.1012">kdo ni po kisli juhi priplaval</relatedEntry>
</relatedEntryList>
</head>
<body>
<senseList>
<sense id="317">
<definitionList/>
<exampleContainerList>
<exampleContainer>
<corpusExample exampleId="GF.111.11.11" modified="false">
<comp num="1">Župani</comp>
<comp num="2">so</comp> kljub volitvam povečini na vodilna občinska mesta
<comp num="6">priplavali</comp>
<comp num="3">po</comp>
<comp num="4">kisli</comp> juhi ali
<comp num="5">župi</comp>, kot
jo nezmotljivo nemčimo na štajerskem območju. </corpusExample>
</exampleContainer>
</exampleContainerList>
</sense>
</senseList>
</body>
</entry>
<entry>
<head>
<headword>
<lemma>kdo ni po juhi priplaval</lemma>
</headword>
<lexicalUnit id="45345435" type="MWE" origin="lbs_fe.1011" structure_id="1000503">
<!-- kdo ni po juhi priplaval -->
<component num="1">
<lexeme type="argument">kdo</lexeme>
</component>
<component num="2">
<lexeme lemma="biti" msd="Va-r3s-y" sloleks="">ni</lexeme>
</component>
<component num="3">
<lexeme lemma="po" msd="Sl" sloleks="">po</lexeme>
</component>
<component num="4">
<lexeme lemma="juha" msd="Ncfsl" sloleks="">juhi</lexeme>
</component>
<component num="5">
<lexeme lemma="priplavati" msd="Vmep-sm" sloleks="">priplaval</lexeme>
</component>
</lexicalUnit>
<measureList>
<measure type="raw_frequency" source="GF2.0">24</measure>
<!-- iz Gigafide -->
</measureList>
</head>
<body>
<senseList>
<sense id="317">
<definitionList/>
<exampleContainerList>
<exampleContainer>
<corpusExample exampleId="GF.111.11.11">
<comp num="1">Tip</comp> tudi
<comp num="2">ni</comp>
<comp num="5">priplaval</comp>
<comp num="3">po</comp>
<comp num="4">juhi</comp> in bo hitro razumel, kaj vam gre po
glavi.</corpusExample>
</exampleContainer>
<!-- ID v zgledih -->
</exampleContainerList>
</sense>
</senseList>
</body>
</entry>
</dictionary>

View File

@ -60,8 +60,12 @@
<xsd:complexType name="lexemeType">
<xsd:simpleContent>
<xsd:extension base="xsd:string">
<xsd:attribute name="type" type="xsd:string"/>
<xsd:attribute name="lexical_unit_lexeme_id" type="xsd:int"/>
<xsd:attribute name="sloleks" type="xsd:string"/>
<xsd:attribute name="kol" type="xsd:string"/>
<xsd:attribute name="lemma" type="xsd:string"/>
<xsd:attribute name="msd" type="xsd:string"/>
</xsd:extension>
</xsd:simpleContent>
</xsd:complexType>
@ -75,6 +79,8 @@
</xsd:sequence>
<xsd:attribute name="id" type="xsd:int" use="required"/>
<xsd:attribute name="type" type="xsd:string" use="required"/>
<xsd:attribute name="structure_id" type="xsd:int"/>
<xsd:attribute name="origin" type="xsd:string"/>
</xsd:complexType>
<xsd:simpleType name="categoryType">
@ -135,6 +141,7 @@
<xsd:simpleContent>
<xsd:extension base="xsd:string">
<xsd:attribute name="lexical_unit_id" type="xsd:int"/>
<xsd:attribute name="origin" type="xsd:string"/>
</xsd:extension>
</xsd:simpleContent>
</xsd:complexType>
@ -230,6 +237,7 @@
</xsd:sequence>
<xsd:attribute name="corpus_id" type="xsd:int"/>
<xsd:attribute name="example_id" type="xsd:int"/>
<xsd:attribute name="exampleId" type="xsd:string"/>
<xsd:attribute name="modified" type="xsd:boolean"/>
<xsd:attribute name="lexical_unit_id" type="xsd:int"/>
<xsd:attribute name="audio" type="xsd:string"/>

View File

@ -5,6 +5,54 @@
schemaLocation="http://www.w3.org/2001/xml.xsd" />
<xsd:include schemaLocation="inventory.xsd"/>
<xsd:element name="lexicon"/>
<xsd:element name="dictionary">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="entry" minOccurs="0" maxOccurs="unbounded">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="head">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="status" type="statusType" minOccurs="0"/>
<xsd:element name="headword" type="headwordType"/>
<xsd:element name="lexicalUnit" type="lexicalUnitType" minOccurs="0"/>
<xsd:element name="grammar" type="grammarType" minOccurs="0"/>
<xsd:element name="measureList" type="measureListType" minOccurs="0"/>
<xsd:element name="variantList" type="variantListType" minOccurs="0"/>
<xsd:element name="relatedEntryList" type="relatedEntryListType" minOccurs="0"/>
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
<xsd:element name="comment" type="commentType" minOccurs="0"/>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
<xsd:element name="body">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="senseList">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="sense" minOccurs="0" maxOccurs="unbounded">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
<xsd:element name="definitionList" type="definitionListType" minOccurs="0"/>
<xsd:element name="exampleContainerList" type="exampleContainerListType" minOccurs="0"/>
</xsd:sequence>
<xsd:attribute name="id" type="xsd:int"/>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:schema>