IssueID #1098: added schema support and example for MWE datasets
This commit is contained in:
parent
0ac4b12d38
commit
fd72e1c32d
|
@ -1,3 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<lexicon/>
|
|
||||||
|
|
156
examples/mwe_lexicon/mwe_manual_example.xml
Normal file
156
examples/mwe_lexicon/mwe_manual_example.xml
Normal file
|
@ -0,0 +1,156 @@
|
||||||
|
<dictionary>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>kdo ni po kisli juhi priplaval</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit id="757847" type="MWE" origin="lbs_fe.1012" structure_id="1000504">
|
||||||
|
<component num="1">
|
||||||
|
<lexeme type="argument">kdo</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="2">
|
||||||
|
<lexeme lemma="biti" msd="Va-r3s-y" sloleks="">ni</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="3">
|
||||||
|
<lexeme lemma="po" msd="Sl" sloleks="">po</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="4">
|
||||||
|
<lexeme lemma="kisel" msd="Agpfsl" sloleks="">kisli</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="5">
|
||||||
|
<lexeme lemma="juha" msd="Ncfsl" sloleks="">juhi</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="6">
|
||||||
|
<lexeme lemma="priplavati" msd="Vmep-sm" sloleks="">priplaval</lexeme>
|
||||||
|
</component>
|
||||||
|
</lexicalUnit>
|
||||||
|
<measureList>
|
||||||
|
<measure type="raw_frequency" source="Gigafida 2.0">1</measure>
|
||||||
|
<!-- iz Gigafide -->
|
||||||
|
</measureList>
|
||||||
|
<relatedEntryList>
|
||||||
|
<relatedEntry origin="lbs_fe.2194">kdo ni po kisli župi priplaval</relatedEntry>
|
||||||
|
</relatedEntryList>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<senseList>
|
||||||
|
<sense id="317">
|
||||||
|
<definitionList/>
|
||||||
|
<exampleContainerList>
|
||||||
|
<exampleContainer>
|
||||||
|
<corpusExample exampleId="GF.111.11.11">
|
||||||
|
<comp num="1">Župani</comp>
|
||||||
|
<comp num="2">so</comp> kljub volitvam povečini na vodilna občinska mesta
|
||||||
|
<comp num="6">priplavali</comp>
|
||||||
|
<comp num="3">po</comp>
|
||||||
|
<comp num="4">kisli</comp>
|
||||||
|
<comp num="5">juhi</comp> ali župi, kot jo nezmotljivo nemčimo na
|
||||||
|
štajerskem območju. </corpusExample>
|
||||||
|
</exampleContainer>
|
||||||
|
</exampleContainerList>
|
||||||
|
</sense>
|
||||||
|
</senseList>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>kdo ni po kisli župi priplaval</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit id="3243332" type="MWE" origin="lbs_fe.2194" structure_id="1000504">
|
||||||
|
<component num="1">
|
||||||
|
<lexeme type="argument">kdo</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="2">
|
||||||
|
<lexeme lemma="biti" msd="Va-r3s-y" sloleks="">ni</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="3">
|
||||||
|
<lexeme lemma="po" msd="Sl" sloleks="">po</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="4">
|
||||||
|
<lexeme lemma="kisel" msd="Agpfsl" sloleks="">kisli</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="5">
|
||||||
|
<lexeme lemma="župa" msd="Ncfsl" sloleks="">župi</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="6">
|
||||||
|
<lexeme lemma="priplavati" msd="Vmep-sm" sloleks="">priplaval</lexeme>
|
||||||
|
</component>
|
||||||
|
</lexicalUnit>
|
||||||
|
<measureList>
|
||||||
|
<measure type="raw_frequency" source="GF2.0">1</measure>
|
||||||
|
<!-- iz Gigafide -->
|
||||||
|
</measureList>
|
||||||
|
<relatedEntryList>
|
||||||
|
<relatedEntry origin="lbs_fe.1012">kdo ni po kisli juhi priplaval</relatedEntry>
|
||||||
|
</relatedEntryList>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<senseList>
|
||||||
|
<sense id="317">
|
||||||
|
<definitionList/>
|
||||||
|
<exampleContainerList>
|
||||||
|
<exampleContainer>
|
||||||
|
<corpusExample exampleId="GF.111.11.11" modified="false">
|
||||||
|
<comp num="1">Župani</comp>
|
||||||
|
<comp num="2">so</comp> kljub volitvam povečini na vodilna občinska mesta
|
||||||
|
<comp num="6">priplavali</comp>
|
||||||
|
<comp num="3">po</comp>
|
||||||
|
<comp num="4">kisli</comp> juhi ali
|
||||||
|
<comp num="5">župi</comp>, kot
|
||||||
|
jo nezmotljivo nemčimo na štajerskem območju. </corpusExample>
|
||||||
|
</exampleContainer>
|
||||||
|
</exampleContainerList>
|
||||||
|
</sense>
|
||||||
|
</senseList>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>kdo ni po juhi priplaval</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit id="45345435" type="MWE" origin="lbs_fe.1011" structure_id="1000503">
|
||||||
|
<!-- kdo ni po juhi priplaval -->
|
||||||
|
<component num="1">
|
||||||
|
<lexeme type="argument">kdo</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="2">
|
||||||
|
<lexeme lemma="biti" msd="Va-r3s-y" sloleks="">ni</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="3">
|
||||||
|
<lexeme lemma="po" msd="Sl" sloleks="">po</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="4">
|
||||||
|
<lexeme lemma="juha" msd="Ncfsl" sloleks="">juhi</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="5">
|
||||||
|
<lexeme lemma="priplavati" msd="Vmep-sm" sloleks="">priplaval</lexeme>
|
||||||
|
</component>
|
||||||
|
</lexicalUnit>
|
||||||
|
<measureList>
|
||||||
|
<measure type="raw_frequency" source="GF2.0">24</measure>
|
||||||
|
<!-- iz Gigafide -->
|
||||||
|
</measureList>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<senseList>
|
||||||
|
<sense id="317">
|
||||||
|
<definitionList/>
|
||||||
|
<exampleContainerList>
|
||||||
|
<exampleContainer>
|
||||||
|
<corpusExample exampleId="GF.111.11.11">
|
||||||
|
<comp num="1">Tip</comp> tudi
|
||||||
|
<comp num="2">ni</comp>
|
||||||
|
<comp num="5">priplaval</comp>
|
||||||
|
<comp num="3">po</comp>
|
||||||
|
<comp num="4">juhi</comp> in bo hitro razumel, kaj vam gre po
|
||||||
|
glavi.</corpusExample>
|
||||||
|
</exampleContainer>
|
||||||
|
<!-- ID v zgledih -->
|
||||||
|
</exampleContainerList>
|
||||||
|
</sense>
|
||||||
|
</senseList>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
</dictionary>
|
|
@ -60,8 +60,12 @@
|
||||||
<xsd:complexType name="lexemeType">
|
<xsd:complexType name="lexemeType">
|
||||||
<xsd:simpleContent>
|
<xsd:simpleContent>
|
||||||
<xsd:extension base="xsd:string">
|
<xsd:extension base="xsd:string">
|
||||||
|
<xsd:attribute name="type" type="xsd:string"/>
|
||||||
<xsd:attribute name="lexical_unit_lexeme_id" type="xsd:int"/>
|
<xsd:attribute name="lexical_unit_lexeme_id" type="xsd:int"/>
|
||||||
<xsd:attribute name="sloleks" type="xsd:string"/>
|
<xsd:attribute name="sloleks" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="kol" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="lemma" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="msd" type="xsd:string"/>
|
||||||
</xsd:extension>
|
</xsd:extension>
|
||||||
</xsd:simpleContent>
|
</xsd:simpleContent>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
@ -75,6 +79,8 @@
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
<xsd:attribute name="id" type="xsd:int" use="required"/>
|
<xsd:attribute name="id" type="xsd:int" use="required"/>
|
||||||
<xsd:attribute name="type" type="xsd:string" use="required"/>
|
<xsd:attribute name="type" type="xsd:string" use="required"/>
|
||||||
|
<xsd:attribute name="structure_id" type="xsd:int"/>
|
||||||
|
<xsd:attribute name="origin" type="xsd:string"/>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
|
||||||
<xsd:simpleType name="categoryType">
|
<xsd:simpleType name="categoryType">
|
||||||
|
@ -135,6 +141,7 @@
|
||||||
<xsd:simpleContent>
|
<xsd:simpleContent>
|
||||||
<xsd:extension base="xsd:string">
|
<xsd:extension base="xsd:string">
|
||||||
<xsd:attribute name="lexical_unit_id" type="xsd:int"/>
|
<xsd:attribute name="lexical_unit_id" type="xsd:int"/>
|
||||||
|
<xsd:attribute name="origin" type="xsd:string"/>
|
||||||
</xsd:extension>
|
</xsd:extension>
|
||||||
</xsd:simpleContent>
|
</xsd:simpleContent>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
@ -230,6 +237,7 @@
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
<xsd:attribute name="corpus_id" type="xsd:int"/>
|
<xsd:attribute name="corpus_id" type="xsd:int"/>
|
||||||
<xsd:attribute name="example_id" type="xsd:int"/>
|
<xsd:attribute name="example_id" type="xsd:int"/>
|
||||||
|
<xsd:attribute name="exampleId" type="xsd:string"/>
|
||||||
<xsd:attribute name="modified" type="xsd:boolean"/>
|
<xsd:attribute name="modified" type="xsd:boolean"/>
|
||||||
<xsd:attribute name="lexical_unit_id" type="xsd:int"/>
|
<xsd:attribute name="lexical_unit_id" type="xsd:int"/>
|
||||||
<xsd:attribute name="audio" type="xsd:string"/>
|
<xsd:attribute name="audio" type="xsd:string"/>
|
||||||
|
|
|
@ -5,6 +5,54 @@
|
||||||
schemaLocation="http://www.w3.org/2001/xml.xsd" />
|
schemaLocation="http://www.w3.org/2001/xml.xsd" />
|
||||||
<xsd:include schemaLocation="inventory.xsd"/>
|
<xsd:include schemaLocation="inventory.xsd"/>
|
||||||
|
|
||||||
<xsd:element name="lexicon"/>
|
<xsd:element name="dictionary">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="entry" minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="head">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="status" type="statusType" minOccurs="0"/>
|
||||||
|
<xsd:element name="headword" type="headwordType"/>
|
||||||
|
<xsd:element name="lexicalUnit" type="lexicalUnitType" minOccurs="0"/>
|
||||||
|
<xsd:element name="grammar" type="grammarType" minOccurs="0"/>
|
||||||
|
<xsd:element name="measureList" type="measureListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="variantList" type="variantListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="relatedEntryList" type="relatedEntryListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="comment" type="commentType" minOccurs="0"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="body">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="senseList">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="sense" minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="definitionList" type="definitionListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="exampleContainerList" type="exampleContainerListType" minOccurs="0"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="id" type="xsd:int"/>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
|
||||||
</xsd:schema>
|
</xsd:schema>
|
||||||
|
|
Loading…
Reference in New Issue
Block a user