IssueID #1130: added support for collocation datasets

This commit is contained in:
Cyprian Laskowski 2020-02-27 00:15:08 +01:00
parent 39b0f6e6b3
commit 41be10e153
3 changed files with 142 additions and 1 deletions

View File

@ -0,0 +1,84 @@
<dictionary>
<entry>
<head>
<headword>
<lemma>absorbirati</lemma>
</headword>
<lexicalUnit id="294" type="single">
<lexeme>absorbirati</lexeme>
</lexicalUnit>
<grammar>
<category>glagol</category>
</grammar>
<measureList>
<measure type="frequency" source="Gigafida 1.0">3295</measure>
</measureList>
</head>
<body>
<senseList>
<sense id="1">
<syntacticStructureList>
<syntacticStructure id="34" name="gbz_pp_sbz2" pp="brez">
<exampleContainerList>
<exampleContainer>
<multiwordExample type="collocation" cluster="1" frequency="5" logDice="2.23">
<comp num="1" role="headword" sloleks="LE_a293f9ab871299f116dff2cc1421367a">absorbirati</comp>
<comp num="2" role="other" sloleks="LE_4485e634af6a4b16093fe3140632418a">brez</comp>
<comp num="3" role="collocate" sloleks="LE_b66fc194167f71cb086b3c0b80820711">težave</comp>
</multiwordExample>
</exampleContainer>
<exampleContainer>
<multiwordExample type="collocation" cluster="1" frequency="14" logDice="11.25">
<comp num="1" role="headword" sloleks="LE_a293f9ab871299f116dff2cc1421367a">absorbirati</comp>
<comp num="2" role="other" sloleks="LE_ba405c6d2b3a4b0cf9f6216af3812c6b">brez</comp>
<comp num="3" role="collocate" sloleks="LE_274a6fc240c9e3c4c2dc3e4804a0c11f">zahteve</comp>
</multiwordExample>
</exampleContainer>
</exampleContainerList>
</syntacticStructure>
<syntacticStructure id="34" name="gbz_pp_sbz2" pp="iz">
<exampleContainerList>
<exampleContainer>
<multiwordExample type="collocation" cluster="2" frequency="14" logDice="11.25">
<comp num="1" role="headword" sloleks="LE_a293f9ab871299f116dff2cc1421367a">absorbirati</comp>
<comp num="2" role="other" sloleks="LE_ba405c6d2b3a4b0cf9f6216af3812c6b">iz</comp>
<comp num="3" role="collocate" sloleks="LE_274a6fc240c9e3c4c2dc3e4804a0c11f">prebavila</comp>
</multiwordExample>
</exampleContainer>
<exampleContainer>
<multiwordExample type="collocation" cluster="2" frequency="14" logDice="11.25">
<comp num="1" role="headword" sloleks="LE_a293f9ab871299f116dff2cc1421367a">absorbirati</comp>
<comp num="2" role="other" sloleks="LE_ba405c6d2b3a4b0cf9f6216af3812c6b">iz</comp>
<comp num="3" role="collocate" sloleks="LE_274a6fc240c9e3c4c2dc3e4804a0c11f">želodca</comp>
</multiwordExample>
</exampleContainer>
</exampleContainerList>
</syntacticStructure>
</syntacticStructureList>
</sense>
<sense id="2">
<syntacticStructureList>
<syntacticStructure id="34" name="gbz_pp_sbz2" pp="brez">
<exampleContainerList>
<exampleContainer>
<multiwordExample type="collocation" cluster="1" frequency="5" logDice="2.23">
<comp num="1" role="headword" sloleks="LE_a293f9ab871299f116dff2cc1421367a">absorbirati</comp>
<comp num="2" role="other" sloleks="LE_4485e634af6a4b16093fe3140632418a">brez</comp>
<comp num="3" role="collocate" sloleks="LE_b66fc194167f71cb086b3c0b80820711">težave</comp>
</multiwordExample>
</exampleContainer>
<exampleContainer>
<multiwordExample type="collocation" cluster="2" frequency="14" logDice="11.25">
<comp num="1" role="headword" sloleks="LE_a293f9ab871299f116dff2cc1421367a">absorbirati</comp>
<comp num="2" role="other" sloleks="LE_ba405c6d2b3a4b0cf9f6216af3812c6b">brez</comp>
<comp num="3" role="collocate" sloleks="LE_274a6fc240c9e3c4c2dc3e4804a0c11f">prebavila</comp>
</multiwordExample>
</exampleContainer>
</exampleContainerList>
</syntacticStructure>
</syntacticStructureList>
</sense>
</senseList>
</body>
</entry>
</dictionary>

View File

@ -0,0 +1,52 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace"
schemaLocation="http://www.w3.org/2001/xml.xsd" />
<xsd:include schemaLocation="inventory.xsd"/>
<xsd:element name="dictionary">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="entry" minOccurs="0" maxOccurs="unbounded">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="head">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="status" type="statusType" minOccurs="0"/>
<xsd:element name="headword" type="headwordType"/>
<xsd:element name="lexicalUnit" type="lexicalUnitType" minOccurs="0"/>
<xsd:element name="grammar" type="grammarType" minOccurs="0"/>
<xsd:element name="measureList" type="measureListType" minOccurs="0"/>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
<xsd:element name="body">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="senseList">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="sense" minOccurs="0" maxOccurs="unbounded">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="syntacticStructureList" type="syntacticStructureListType"/>
</xsd:sequence>
<xsd:attribute name="id" type="xsd:int"/>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:schema>

View File

@ -343,9 +343,14 @@
<xsd:complexType name="syntacticStructureType"> <xsd:complexType name="syntacticStructureType">
<xsd:sequence> <xsd:sequence>
<xsd:choice>
<xsd:element name="component" type="componentType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="component" type="componentType" minOccurs="0" maxOccurs="unbounded"/>
<xsd:element name="exampleContainerList" type="exampleContainerListType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:choice>
</xsd:sequence> </xsd:sequence>
<xsd:attribute name="id" type="xsd:int"/> <xsd:attribute name="id" type="xsd:int"/>
<xsd:attribute name="name" type="xsd:string"/>
<xsd:attribute name="pp" type="xsd:string"/>
</xsd:complexType> </xsd:complexType>
<xsd:complexType name="syntacticStructureListType"> <xsd:complexType name="syntacticStructureListType">