Compare commits
40 Commits
Author | SHA1 | Date | |
---|---|---|---|
3d65ea1a4e | |||
ff77999271 | |||
7a5603f9f9 | |||
2c9d3d6868 | |||
85a7d9f7f2 | |||
c86ccd1401 | |||
fb2cf29d23 | |||
eba09aa98e | |||
7ea24ae5c4 | |||
9bf9a696f6 | |||
652e2e92b5 | |||
d16948f6ae | |||
7b29c6da98 | |||
76c406dffc | |||
13791bdabe | |||
3a4097c6db | |||
3fcc821d3f | |||
35c1c2e135 | |||
6822bf61ef | |||
140f18ff88 | |||
006b553eda | |||
cf2126fa08 | |||
4053f5f2a2 | |||
e036e097ac | |||
35914e758d | |||
15b955776f | |||
977e1f4d0a | |||
9d362a96ef | |||
95810330e0 | |||
9b8de239a5 | |||
b3d30f3dd4 | |||
68f5d8cbd5 | |||
8a1c1bc812 | |||
3e129eba41 | |||
0e0812c551 | |||
e5d057ae6f | |||
7042ded725 | |||
246e1ab1d6 | |||
07f5cd047f | |||
a58249b67e |
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -0,0 +1 @@
|
||||||
|
tmp
|
2623
examples/bilingual_dictionaries/vsms-sample.xml
Normal file
2623
examples/bilingual_dictionaries/vsms-sample.xml
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -61,7 +61,7 @@
|
||||||
<label type="type3">value7</label>
|
<label type="type3">value7</label>
|
||||||
</labelList>
|
</labelList>
|
||||||
<translation targetLang="hun" source="source1">hiša</translation>
|
<translation targetLang="hun" source="source1">hiša</translation>
|
||||||
<explanation/>
|
<explanationList/>
|
||||||
</translationContainer>
|
</translationContainer>
|
||||||
<translationContainer cluster="1">
|
<translationContainer cluster="1">
|
||||||
<translation targetLang="hun">dom</translation>
|
<translation targetLang="hun">dom</translation>
|
||||||
|
@ -71,12 +71,14 @@
|
||||||
<exampleContainerList>
|
<exampleContainerList>
|
||||||
<exampleContainer>
|
<exampleContainer>
|
||||||
<corpusExample example_id="4343" modified="true" audio="audio-465346">Kupili smo <comp role="headword">hišo</comp>.</corpusExample>
|
<corpusExample example_id="4343" modified="true" audio="audio-465346">Kupili smo <comp role="headword">hišo</comp>.</corpusExample>
|
||||||
|
<translationContainerList>
|
||||||
<translationContainer>
|
<translationContainer>
|
||||||
<labelList>
|
<labelList>
|
||||||
<label type="type5">value8</label>
|
<label type="type5">value8</label>
|
||||||
</labelList>
|
</labelList>
|
||||||
<translation targetLang="hun">Blah blah blah 1.</translation>
|
<translation targetLang="hun">Blah blah blah 1.</translation>
|
||||||
</translationContainer>
|
</translationContainer>
|
||||||
|
</translationContainerList>
|
||||||
</exampleContainer>
|
</exampleContainer>
|
||||||
<exampleContainer>
|
<exampleContainer>
|
||||||
<multiwordExample lexical_unit_id="19" type="collocation" audio="audio-56643">
|
<multiwordExample lexical_unit_id="19" type="collocation" audio="audio-56643">
|
||||||
|
@ -84,15 +86,19 @@
|
||||||
<comp role="collocate" sloleks="LE_blahblah">ogledati</comp>
|
<comp role="collocate" sloleks="LE_blahblah">ogledati</comp>
|
||||||
<comp role="headword">hiša</comp>
|
<comp role="headword">hiša</comp>
|
||||||
</multiwordExample>
|
</multiwordExample>
|
||||||
|
<translationContainerList>
|
||||||
<translationContainer>
|
<translationContainer>
|
||||||
<translation targetLang="hun" source="source2" audio="audio-blah-77242">blah blah2</translation>
|
<translation targetLang="hun" source="source2" audio="audio-blah-77242">blah blah2</translation>
|
||||||
</translationContainer>
|
</translationContainer>
|
||||||
|
</translationContainerList>
|
||||||
</exampleContainer>
|
</exampleContainer>
|
||||||
<exampleContainer>
|
<exampleContainer>
|
||||||
<corpusExample example_id="552439" modified="false" lexical_unit_id="19"><comp role="collocate">Ogledali</comp> smo <comp role="collocate">si</comp> <comp role="headword">hišo</comp>.</corpusExample>
|
<corpusExample example_id="552439" modified="false" lexical_unit_id="19"><comp role="collocate">Ogledali</comp> smo <comp role="collocate">si</comp> <comp role="headword">hišo</comp>.</corpusExample>
|
||||||
|
<translationContainerList>
|
||||||
<translationContainer>
|
<translationContainer>
|
||||||
<translation targetLang="hun" source="source2">Blah blah blah 3.</translation>
|
<translation targetLang="hun" source="source2">Blah blah blah 3.</translation>
|
||||||
</translationContainer>
|
</translationContainer>
|
||||||
|
</translationContainerList>
|
||||||
</exampleContainer>
|
</exampleContainer>
|
||||||
</exampleContainerList>
|
</exampleContainerList>
|
||||||
</sense>
|
</sense>
|
||||||
|
|
|
@ -1,84 +0,0 @@
|
||||||
<dictionary>
|
|
||||||
<entry>
|
|
||||||
<head>
|
|
||||||
<headword>
|
|
||||||
<lemma>absorbirati</lemma>
|
|
||||||
</headword>
|
|
||||||
<lexicalUnit id="294" type="single">
|
|
||||||
<lexeme>absorbirati</lexeme>
|
|
||||||
</lexicalUnit>
|
|
||||||
<grammar>
|
|
||||||
<category>glagol</category>
|
|
||||||
</grammar>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="Gigafida 1.0">3295</measure>
|
|
||||||
</measureList>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<senseList>
|
|
||||||
<sense id="1">
|
|
||||||
<syntacticStructureList>
|
|
||||||
<syntacticStructure id="34" name="gbz_pp_sbz2" pp="brez">
|
|
||||||
<exampleContainerList>
|
|
||||||
<exampleContainer>
|
|
||||||
<multiwordExample type="collocation" cluster="1" frequency="5" logDice="2.23">
|
|
||||||
<comp num="1" role="headword" sloleks="LE_a293f9ab871299f116dff2cc1421367a">absorbirati</comp>
|
|
||||||
<comp num="2" role="other" sloleks="LE_4485e634af6a4b16093fe3140632418a">brez</comp>
|
|
||||||
<comp num="3" role="collocate" sloleks="LE_b66fc194167f71cb086b3c0b80820711">težave</comp>
|
|
||||||
</multiwordExample>
|
|
||||||
</exampleContainer>
|
|
||||||
<exampleContainer>
|
|
||||||
<multiwordExample type="collocation" cluster="1" frequency="14" logDice="11.25">
|
|
||||||
<comp num="1" role="headword" sloleks="LE_a293f9ab871299f116dff2cc1421367a">absorbirati</comp>
|
|
||||||
<comp num="2" role="other" sloleks="LE_ba405c6d2b3a4b0cf9f6216af3812c6b">brez</comp>
|
|
||||||
<comp num="3" role="collocate" sloleks="LE_274a6fc240c9e3c4c2dc3e4804a0c11f">zahteve</comp>
|
|
||||||
</multiwordExample>
|
|
||||||
</exampleContainer>
|
|
||||||
</exampleContainerList>
|
|
||||||
</syntacticStructure>
|
|
||||||
<syntacticStructure id="34" name="gbz_pp_sbz2" pp="iz">
|
|
||||||
<exampleContainerList>
|
|
||||||
<exampleContainer>
|
|
||||||
<multiwordExample type="collocation" cluster="2" frequency="14" logDice="11.25">
|
|
||||||
<comp num="1" role="headword" sloleks="LE_a293f9ab871299f116dff2cc1421367a">absorbirati</comp>
|
|
||||||
<comp num="2" role="other" sloleks="LE_ba405c6d2b3a4b0cf9f6216af3812c6b">iz</comp>
|
|
||||||
<comp num="3" role="collocate" sloleks="LE_274a6fc240c9e3c4c2dc3e4804a0c11f">prebavila</comp>
|
|
||||||
</multiwordExample>
|
|
||||||
</exampleContainer>
|
|
||||||
<exampleContainer>
|
|
||||||
<multiwordExample type="collocation" cluster="2" frequency="14" logDice="11.25">
|
|
||||||
<comp num="1" role="headword" sloleks="LE_a293f9ab871299f116dff2cc1421367a">absorbirati</comp>
|
|
||||||
<comp num="2" role="other" sloleks="LE_ba405c6d2b3a4b0cf9f6216af3812c6b">iz</comp>
|
|
||||||
<comp num="3" role="collocate" sloleks="LE_274a6fc240c9e3c4c2dc3e4804a0c11f">želodca</comp>
|
|
||||||
</multiwordExample>
|
|
||||||
</exampleContainer>
|
|
||||||
</exampleContainerList>
|
|
||||||
</syntacticStructure>
|
|
||||||
</syntacticStructureList>
|
|
||||||
</sense>
|
|
||||||
<sense id="2">
|
|
||||||
<syntacticStructureList>
|
|
||||||
<syntacticStructure id="34" name="gbz_pp_sbz2" pp="brez">
|
|
||||||
<exampleContainerList>
|
|
||||||
<exampleContainer>
|
|
||||||
<multiwordExample type="collocation" cluster="1" frequency="5" logDice="2.23">
|
|
||||||
<comp num="1" role="headword" sloleks="LE_a293f9ab871299f116dff2cc1421367a">absorbirati</comp>
|
|
||||||
<comp num="2" role="other" sloleks="LE_4485e634af6a4b16093fe3140632418a">brez</comp>
|
|
||||||
<comp num="3" role="collocate" sloleks="LE_b66fc194167f71cb086b3c0b80820711">težave</comp>
|
|
||||||
</multiwordExample>
|
|
||||||
</exampleContainer>
|
|
||||||
<exampleContainer>
|
|
||||||
<multiwordExample type="collocation" cluster="2" frequency="14" logDice="11.25">
|
|
||||||
<comp num="1" role="headword" sloleks="LE_a293f9ab871299f116dff2cc1421367a">absorbirati</comp>
|
|
||||||
<comp num="2" role="other" sloleks="LE_ba405c6d2b3a4b0cf9f6216af3812c6b">brez</comp>
|
|
||||||
<comp num="3" role="collocate" sloleks="LE_274a6fc240c9e3c4c2dc3e4804a0c11f">prebavila</comp>
|
|
||||||
</multiwordExample>
|
|
||||||
</exampleContainer>
|
|
||||||
</exampleContainerList>
|
|
||||||
</syntacticStructure>
|
|
||||||
</syntacticStructureList>
|
|
||||||
</sense>
|
|
||||||
</senseList>
|
|
||||||
</body>
|
|
||||||
</entry>
|
|
||||||
</dictionary>
|
|
257
examples/monolingual_dictionaries/structure_pipeline_output.xml
Normal file
257
examples/monolingual_dictionaries/structure_pipeline_output.xml
Normal file
|
@ -0,0 +1,257 @@
|
||||||
|
<dictionary>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>obljubiti večno zvestobo komu</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit type="MWE" structure_id="138">
|
||||||
|
<component num="1">
|
||||||
|
<lexeme lemma="obljubiti" msd="Ggdn">obljubiti</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="2">
|
||||||
|
<lexeme lemma="večen" msd="Ppnzet">večno</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="3">
|
||||||
|
<lexeme lemma="zvestoba" msd="Sozet">zvestobo</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="4">
|
||||||
|
<lexeme lemma="kdo" msd="Zv-med">komu</lexeme>
|
||||||
|
</component>
|
||||||
|
</lexicalUnit>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<senseList/>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>kovati koga/kaj v nebo</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit type="MWE" structure_id="183">
|
||||||
|
<component num="1">
|
||||||
|
<lexeme lemma="kovati" msd="Ggnn">kovati</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="2">
|
||||||
|
<lexeme lemma="kdo" msd="Zv-met">koga</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="3">
|
||||||
|
<lexeme msd="U">/</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="4">
|
||||||
|
<lexeme lemma="kaj" msd="Zv-sei">kaj</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="5">
|
||||||
|
<lexeme lemma="v" msd="Dt">v</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="6">
|
||||||
|
<lexeme lemma="nebo" msd="Soset">nebo</lexeme>
|
||||||
|
</component>
|
||||||
|
</lexicalUnit>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<senseList/>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>kaj v malem</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit type="MWE" structure_id="674">
|
||||||
|
<component num="1">
|
||||||
|
<lexeme lemma="kaj" msd="Zv-sei">kaj</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="2">
|
||||||
|
<lexeme lemma="v" msd="Dm">v</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="3">
|
||||||
|
<lexeme lemma="mali" msd="Ppnsem">malem</lexeme>
|
||||||
|
</component>
|
||||||
|
</lexicalUnit>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<senseList/>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>počutiti se kot riba na suhem</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit type="MWE" structure_id="773">
|
||||||
|
<component num="1">
|
||||||
|
<lexeme lemma="počutiti" msd="Ggnn">počutiti</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="2">
|
||||||
|
<lexeme lemma="se" msd="Zp------k">se</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="3">
|
||||||
|
<lexeme lemma="kot" msd="Vd">kot</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="4">
|
||||||
|
<lexeme lemma="riba" msd="Sozei">riba</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="5">
|
||||||
|
<lexeme lemma="na" msd="Dm">na</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="6">
|
||||||
|
<lexeme lemma="suh" msd="Ppnsem">suhem</lexeme>
|
||||||
|
</component>
|
||||||
|
</lexicalUnit>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<senseList/>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>jemati koga/kaj pod drobnogled</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit type="MWE" structure_id="183">
|
||||||
|
<component num="1">
|
||||||
|
<lexeme lemma="jemati" msd="Ggnn">jemati</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="2">
|
||||||
|
<lexeme lemma="kdo" msd="Zv-met">koga</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="3">
|
||||||
|
<lexeme msd="U">/</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="4">
|
||||||
|
<lexeme lemma="kaj" msd="Zv-set">kaj</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="5">
|
||||||
|
<lexeme lemma="pod" msd="Dt">pod</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="6">
|
||||||
|
<lexeme lemma="drobnogled" msd="Sometn">drobnogled</lexeme>
|
||||||
|
</component>
|
||||||
|
</lexicalUnit>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<senseList/>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>na slepo</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit type="MWE" structure_id="892">
|
||||||
|
<component num="1">
|
||||||
|
<lexeme lemma="na" msd="Dt">na</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="2">
|
||||||
|
<lexeme lemma="slep" msd="Ppnset">slepo</lexeme>
|
||||||
|
</component>
|
||||||
|
</lexicalUnit>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<senseList/>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>bati se za svojo glavo</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit type="MWE" structure_id="736">
|
||||||
|
<component num="1">
|
||||||
|
<lexeme lemma="bati" msd="Ggnn">bati</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="2">
|
||||||
|
<lexeme lemma="se" msd="Zp------k">se</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="3">
|
||||||
|
<lexeme lemma="za" msd="Dt">za</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="4">
|
||||||
|
<lexeme lemma="svoj" msd="Zp-zet">svojo</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="5">
|
||||||
|
<lexeme lemma="glava" msd="Sozet">glavo</lexeme>
|
||||||
|
</component>
|
||||||
|
</lexicalUnit>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<senseList/>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>luč na koncu tunela</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit type="MWE" structure_id="730">
|
||||||
|
<component num="1">
|
||||||
|
<lexeme lemma="luč" msd="Sozei">luč</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="2">
|
||||||
|
<lexeme lemma="na" msd="Dm">na</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="3">
|
||||||
|
<lexeme lemma="konec" msd="Somem">koncu</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="4">
|
||||||
|
<lexeme lemma="tunel" msd="Somer">tunela</lexeme>
|
||||||
|
</component>
|
||||||
|
</lexicalUnit>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<senseList/>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>skrajni čas je, da</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit type="MWE" structure_id="1545">
|
||||||
|
<component num="1">
|
||||||
|
<lexeme lemma="skrajen" msd="Ppnmeid">skrajni</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="2">
|
||||||
|
<lexeme lemma="čas" msd="Somei">čas</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="3">
|
||||||
|
<lexeme lemma="biti" msd="Gp-ste-n">je</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="4">
|
||||||
|
<lexeme msd="U">,</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="5">
|
||||||
|
<lexeme lemma="da" msd="Vd">da</lexeme>
|
||||||
|
</component>
|
||||||
|
</lexicalUnit>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<senseList/>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>zardeti kot kuhan rak</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit type="MWE" structure_id="151">
|
||||||
|
<component num="1">
|
||||||
|
<lexeme lemma="zardeti" msd="Ggdn">zardeti</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="2">
|
||||||
|
<lexeme lemma="kot" msd="Vd">kot</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="3">
|
||||||
|
<lexeme lemma="kuhan" msd="Pdnmein">kuhan</lexeme>
|
||||||
|
</component>
|
||||||
|
<component num="4">
|
||||||
|
<lexeme lemma="rak" msd="Somei">rak</lexeme>
|
||||||
|
</component>
|
||||||
|
</lexicalUnit>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<senseList/>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
</dictionary>
|
86
examples/morphological_lexicon/Amanda.xml
Normal file
86
examples/morphological_lexicon/Amanda.xml
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
<lexicon>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma orthography_ids="342" dynamic_accentuation_ids="81" pronunciation_ids="32|33" hyphenation_ids="7" tonemic_accentuation_ids="85">Amanda</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit sloleksId="LE_c77074df05d2719216d477bd01d418ad" sloleksKey="S_Amanda" type="single">
|
||||||
|
<lexeme lexical_unit_lexeme_id="83224">Amanda</lexeme>
|
||||||
|
</lexicalUnit>
|
||||||
|
<grammar>
|
||||||
|
<category>samostalnik</category>
|
||||||
|
<subcategory type="pronunciation">ambiguous</subcategory>
|
||||||
|
<grammarFeature name="type">proper</grammarFeature>
|
||||||
|
<grammarFeature name="gender">feminine</grammarFeature>
|
||||||
|
</grammar>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">285818</measure>
|
||||||
|
</measureList>
|
||||||
|
<relatedEntryList/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<wordFormList>
|
||||||
|
<wordForm id="333330">
|
||||||
|
<msd language="sl" system="JOS">Slzei</msd>
|
||||||
|
<msd language="en" system="UD">PROPN|...</msd>
|
||||||
|
<grammarFeatureList>
|
||||||
|
<grammarFeature name="number">singular</grammarFeature>
|
||||||
|
<grammarFeature name="case">nominative</grammarFeature>
|
||||||
|
</grammarFeatureList>
|
||||||
|
<formRepresentations>
|
||||||
|
<orthographyList>
|
||||||
|
<orthography id="342" morphologyPatterns="Sz.1.2.0">
|
||||||
|
<form>Amanda</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">813</measure>
|
||||||
|
<measure type="frequency" source="GOS 1.0">432</measure>
|
||||||
|
</measureList>
|
||||||
|
</orthography>
|
||||||
|
</orthographyList>
|
||||||
|
<accentuationList type="dynamic">
|
||||||
|
<accentuation id="81" orthography_id="342" accentuationPatterns="cjvt:AP.1|zrc:S202b">
|
||||||
|
<form>Amánda</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">77</measure>
|
||||||
|
</measureList>
|
||||||
|
</accentuation>
|
||||||
|
<accentuation id="82" orthography_id="342" accentuationPatterns="cjvt:no_accentuation|zrc:no_accentuation"/>
|
||||||
|
</accentuationList>
|
||||||
|
<accentuationList type="tonemic">
|
||||||
|
<accentuation id="85" orthography_id="342" accentuationPatterns="zrc:S202b|zrc:S202i">
|
||||||
|
<form>Amànda</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</accentuation>
|
||||||
|
</accentuationList>
|
||||||
|
<pronunciationList>
|
||||||
|
<pronunciation id="32" dynamic_accentuation_id="115" type="slovene">
|
||||||
|
<form script="IPA">aˈmaːnda</form>
|
||||||
|
<form script="SAMPA">a"ma:nda</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
<pronunciation id="33" dynamic_accentuation_id="82" type="english">
|
||||||
|
<form script="IPA">əˈmɛːnda</form>
|
||||||
|
<form script="SAMPA">ə"mE:nda</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
</pronunciationList>
|
||||||
|
<hyphenationList>
|
||||||
|
<hyphenation id="7" orthography_id="342">
|
||||||
|
<form>A|man|da</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</hyphenation>
|
||||||
|
</hyphenationList>
|
||||||
|
</formRepresentations>
|
||||||
|
</wordForm>
|
||||||
|
</wordFormList>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
</lexicon>
|
75
examples/morphological_lexicon/PCR.xml
Normal file
75
examples/morphological_lexicon/PCR.xml
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
<lexicon>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma orthography_ids="342" dynamic_accentuation_ids="81" pronunciation_ids="32|33|34" hyphenation_ids="" tonemic_accentuation_ids="85">PCR</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit sloleksId="LE_c77074df05d2719216d477bd01d418ad" sloleksKey="S_PCR" type="single">
|
||||||
|
<lexeme lexical_unit_lexeme_id="83224">PCR</lexeme>
|
||||||
|
</lexicalUnit>
|
||||||
|
<grammar>
|
||||||
|
<category>samostalnik</category>
|
||||||
|
<subcategory type="pronunciation">acronym</subcategory>
|
||||||
|
<grammarFeature name="type">common</grammarFeature>
|
||||||
|
<grammarFeature name="gender">masculine</grammarFeature>
|
||||||
|
</grammar>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">285818</measure>
|
||||||
|
</measureList>
|
||||||
|
<relatedEntryList/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<wordFormList>
|
||||||
|
<wordForm id="333330">
|
||||||
|
<msd language="sl" system="JOS">Somei</msd>
|
||||||
|
<msd language="en" system="UD">NOUN|...</msd>
|
||||||
|
<grammarFeatureList>
|
||||||
|
<grammarFeature name="number">singular</grammarFeature>
|
||||||
|
<grammarFeature name="case">nominative</grammarFeature>
|
||||||
|
</grammarFeatureList>
|
||||||
|
<formRepresentations>
|
||||||
|
<orthographyList>
|
||||||
|
<orthography id="342" morphologyPatterns="Sm.5.1.o">
|
||||||
|
<form>PCR</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">813</measure>
|
||||||
|
<measure type="frequency" source="GOS 1.0">432</measure>
|
||||||
|
</measureList>
|
||||||
|
</orthography>
|
||||||
|
</orthographyList>
|
||||||
|
<accentuationList type="dynamic">
|
||||||
|
<accentuation id="81" orthography_id="342" accentuationPatterns="cjvt:no_accentuation"/>
|
||||||
|
</accentuationList>
|
||||||
|
<accentuationList type="tonemic">
|
||||||
|
<accentuation id="85" orthography_id="342" accentuationPatterns="zrc:no_accentuation"/>
|
||||||
|
</accentuationList>
|
||||||
|
<pronunciationList>
|
||||||
|
<pronunciation id="32" dynamic_accentuation_id="81" type="slovene_spelling_short" status="preferred">
|
||||||
|
<form script="IPA">ˈpəˈtsəˈrə</form>
|
||||||
|
<form script="SAMPA">"p@"ts@"r@</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
<pronunciation id="33" dynamic_accentuation_id="81" type="slovene_spelling_long">
|
||||||
|
<form script="IPA">ˈpeːˈʦeːˈɛɾ</form>
|
||||||
|
<form script="SAMPA">"pe:"tse:"E:r</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
<pronunciation id="34" dynamic_accentuation_id="81" type="english_spelling">
|
||||||
|
<form script="IPA">ˈpiːˈsiːˈaːɾ</form>
|
||||||
|
<form script="SAMPA">"pi:"si:"a:r</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
</pronunciationList>
|
||||||
|
<hyphenationList/>
|
||||||
|
</formRepresentations>
|
||||||
|
</wordForm>
|
||||||
|
</wordFormList>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
</lexicon>
|
70
examples/morphological_lexicon/Shakespeare.xml
Normal file
70
examples/morphological_lexicon/Shakespeare.xml
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
<lexicon>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma orthography_ids="342" dynamic_accentuation_ids="81" pronunciation_ids="32|33" hyphenation_ids="" tonemic_accentuation_ids="85">Shakespeare</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit sloleksId="LE_c77074df05d2719216d477bd01d418ad" sloleksKey="S_Shakespeare" type="single">
|
||||||
|
<lexeme lexical_unit_lexeme_id="83224">Shakespeare</lexeme>
|
||||||
|
</lexicalUnit>
|
||||||
|
<grammar>
|
||||||
|
<category>samostalnik</category>
|
||||||
|
<subcategory type="pronunciation">other</subcategory>
|
||||||
|
<grammarFeature name="type">proper</grammarFeature>
|
||||||
|
<grammarFeature name="gender">masculine</grammarFeature>
|
||||||
|
</grammar>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">285818</measure>
|
||||||
|
</measureList>
|
||||||
|
<relatedEntryList/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<wordFormList>
|
||||||
|
<wordForm id="333330">
|
||||||
|
<msd language="sl" system="JOS">Slmei</msd>
|
||||||
|
<msd language="en" system="UD">PROPN|...</msd>
|
||||||
|
<grammarFeatureList>
|
||||||
|
<grammarFeature name="number">singular</grammarFeature>
|
||||||
|
<grammarFeature name="case">nominative</grammarFeature>
|
||||||
|
</grammarFeatureList>
|
||||||
|
<formRepresentations>
|
||||||
|
<orthographyList>
|
||||||
|
<orthography id="342" morphologyPatterns="Sm.5.1.o">
|
||||||
|
<form>Shakespeare</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">813</measure>
|
||||||
|
<measure type="frequency" source="GOS 1.0">432</measure>
|
||||||
|
</measureList>
|
||||||
|
</orthography>
|
||||||
|
</orthographyList>
|
||||||
|
<accentuationList type="dynamic">
|
||||||
|
<accentuation id="81" orthography_id="342" accentuationPatterns="cjvt:no_accentuation"/>
|
||||||
|
</accentuationList>
|
||||||
|
<accentuationList type="tonemic">
|
||||||
|
<accentuation id="82" orthography_id="342" accentuationPatterns="cjvt:no_accentuation"/>
|
||||||
|
</accentuationList>
|
||||||
|
<pronunciationList>
|
||||||
|
<pronunciation id="32" dynamic_accentuation_id="81" status="preferred">
|
||||||
|
<form script="IPA">ˈʃɛːɪkspiɾ</form>
|
||||||
|
<form script="SAMPA">"SE:Ikspir</form>
|
||||||
|
<form script="slovene_phonetization">šêjkspir</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
<pronunciation id="33" dynamic_accentuation_id="81">
|
||||||
|
<form script="IPA">ˈʃeːkspiɾ</form>
|
||||||
|
<form script="SAMPA">"Se:kspir</form>
|
||||||
|
<form script="slovene_phonetization">šékspir</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
</pronunciationList>
|
||||||
|
<hyphenationList/>
|
||||||
|
</formRepresentations>
|
||||||
|
</wordForm>
|
||||||
|
</wordFormList>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
</lexicon>
|
2629
examples/morphological_lexicon/clarin_sample.xml
Normal file
2629
examples/morphological_lexicon/clarin_sample.xml
Normal file
File diff suppressed because it is too large
Load Diff
84
examples/morphological_lexicon/gostja.xml
Normal file
84
examples/morphological_lexicon/gostja.xml
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
<lexicon>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma orthography_ids="342" dynamic_accentuation_ids="81" pronunciation_ids="32" hyphenation_ids="7|8" tonemic_accentuation_ids="85">gostja</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit sloleksId="LE_c77074df05d2719216d477bd01d418ad" sloleksKey="S_gostja" type="single">
|
||||||
|
<lexeme lexical_unit_lexeme_id="83224">gostja</lexeme>
|
||||||
|
</lexicalUnit>
|
||||||
|
<grammar>
|
||||||
|
<category>samostalnik</category>
|
||||||
|
<subcategory type="pronunciation">slovene</subcategory>
|
||||||
|
<grammarFeature name="type">common</grammarFeature>
|
||||||
|
<grammarFeature name="gender">feminine</grammarFeature>
|
||||||
|
</grammar>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">285818</measure>
|
||||||
|
</measureList>
|
||||||
|
<relatedEntryList/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<wordFormList>
|
||||||
|
<wordForm id="333330">
|
||||||
|
<msd language="sl" system="JOS">Sozei</msd>
|
||||||
|
<msd language="en" system="UD">NOUN|...</msd>
|
||||||
|
<grammarFeatureList>
|
||||||
|
<grammarFeature name="number">singular</grammarFeature>
|
||||||
|
<grammarFeature name="case">nominative</grammarFeature>
|
||||||
|
</grammarFeatureList>
|
||||||
|
<formRepresentations>
|
||||||
|
<orthographyList>
|
||||||
|
<orthography id="342" morphologyPatterns="Sz.1.2.0">
|
||||||
|
<form>gostja</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">813</measure>
|
||||||
|
<measure type="frequency" source="GOS 1.0">432</measure>
|
||||||
|
</measureList>
|
||||||
|
</orthography>
|
||||||
|
</orthographyList>
|
||||||
|
<accentuationList type="dynamic">
|
||||||
|
<accentuation id="81" orthography_id="342" accentuationPatterns="cjvt:AP.1|zrc:S202b" status="preferred">
|
||||||
|
<form>gôstja</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">77</measure>
|
||||||
|
</measureList>
|
||||||
|
</accentuation>
|
||||||
|
</accentuationList>
|
||||||
|
<accentuationList type="tonemic">
|
||||||
|
<accentuation id="85" orthography_id="342" accentuationPatterns="zrc:S202b|zrc:S202i" status="preferred">
|
||||||
|
<form>góstja</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</accentuation>
|
||||||
|
</accentuationList>
|
||||||
|
<pronunciationList>
|
||||||
|
<pronunciation id="32" dynamic_accentuation_id="81" tonemic_accentuation_id="85">
|
||||||
|
<form script="IPA">ˈɡo:stja</form>
|
||||||
|
<form script="SAMPA">"gO:stja</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
</pronunciationList>
|
||||||
|
<hyphenationList>
|
||||||
|
<hyphenation id="7" orthography_id="342" status="preferred">
|
||||||
|
<form>go|stja</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</hyphenation>
|
||||||
|
<hyphenation id="8" orthography_id="342">
|
||||||
|
<form>gos|tja</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</hyphenation>
|
||||||
|
</hyphenationList>
|
||||||
|
</formRepresentations>
|
||||||
|
</wordForm>
|
||||||
|
</wordFormList>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
</lexicon>
|
440
examples/semantic_types/semantic_types_sample.xml
Normal file
440
examples/semantic_types/semantic_types_sample.xml
Normal file
|
@ -0,0 +1,440 @@
|
||||||
|
<semanticTypes>
|
||||||
|
<semanticType SEMCODE="20">
|
||||||
|
<name sl="AKTIVNOST">ACT</name>
|
||||||
|
<semanticType SEMCODE="20.1">
|
||||||
|
<name sl="DEJANJE">ACTION</name>
|
||||||
|
<semanticType SEMCODE="20.1.2">
|
||||||
|
<name sl="čustveno">emotional</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.16">
|
||||||
|
<name sl="dogodek_človeški">event_human</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.14">
|
||||||
|
<name sl="družbeno">social</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.8">
|
||||||
|
<name sl="gibanje">movement</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.3">
|
||||||
|
<name sl="kognicija">cognition</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.4">
|
||||||
|
<name sl="komunikacija">communication</name>
|
||||||
|
<semanticType SEMCODE="20.1.4.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.4.2">
|
||||||
|
<name sl="nečloveška">nonhuman</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.4.3">
|
||||||
|
<name sl="pisna_govorna">written_spoken</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.4.4">
|
||||||
|
<name sl="telesna">physical</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.12">
|
||||||
|
<name sl="lastnina">property</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.13">
|
||||||
|
<name sl="medicinski_postopek">medical_procedure</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.5">
|
||||||
|
<name sl="percepcija">perception</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.18">
|
||||||
|
<name sl="pojav">phenomenon</name>
|
||||||
|
<semanticType SEMCODE="20.1.18.1">
|
||||||
|
<name sl="ostalo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.18.2">
|
||||||
|
<name sl="zvočni">sound</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.6">
|
||||||
|
<name sl="sprememba">change</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.17">
|
||||||
|
<name sl="stanje">state</name>
|
||||||
|
<semanticType SEMCODE="20.1.17.1">
|
||||||
|
<name sl="fazno">phase</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.17.2">
|
||||||
|
<name sl="modalno">modal</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.7">
|
||||||
|
<name sl="stik">contact</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.15">
|
||||||
|
<name sl="tekmovanje">contest</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.9">
|
||||||
|
<name sl="telesno">physical</name>
|
||||||
|
<semanticType SEMCODE="20.1.9.1">
|
||||||
|
<name sl="nega">care</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.9.2">
|
||||||
|
<name sl="vitalno">vital</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.10">
|
||||||
|
<name sl="ustvarjanje">creation</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.11">
|
||||||
|
<name sl="zaužitje">consumption</name>
|
||||||
|
<semanticType SEMCODE="20.1.11.1">
|
||||||
|
<name sl="hrana_kuha">food_cooking</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.11.2">
|
||||||
|
<name sl="hrana_pridelava">food_production</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.1.11.3">
|
||||||
|
<name sl="hrana_uživanje">food_consumption</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.2">
|
||||||
|
<name sl="DEJAVNOST">ACTIVITY</name>
|
||||||
|
<semanticType SEMCODE="20.2.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.2.2">
|
||||||
|
<name sl="gospodarska">industrial</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.2.3">
|
||||||
|
<name sl="negospodarska">nonindustrial</name>
|
||||||
|
<semanticType SEMCODE="20.2.3.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.2.3.2">
|
||||||
|
<name sl="igra">game</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.2.3.3">
|
||||||
|
<name sl="ples">dance</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="20.2.3.4">
|
||||||
|
<name sl="šport">sport</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9">
|
||||||
|
<name sl="ARTEFAKT">ARTEFACT</name>
|
||||||
|
<semanticType SEMCODE="9.2">
|
||||||
|
<name sl="denar">money</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.3">
|
||||||
|
<name sl="dokument">document</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.4">
|
||||||
|
<name sl="glasbilo">instrument</name>
|
||||||
|
<semanticType SEMCODE="9.4.2">
|
||||||
|
<name sl="brenkalo">string_plucking</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.4.8">
|
||||||
|
<name sl="del_glasbila">part</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.4.3">
|
||||||
|
<name sl="godalo">string_bowing</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.4.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.4.4">
|
||||||
|
<name sl="pihalo">wind</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.4.9">
|
||||||
|
<name sl="pripomoček">tool</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.4.5">
|
||||||
|
<name sl="tipke">keyboard</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.4.6">
|
||||||
|
<name sl="tolkalo">percussion</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.4.7">
|
||||||
|
<name sl="trobilo">brass</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.5">
|
||||||
|
<name sl="komunikacija">communication</name>
|
||||||
|
<semanticType SEMCODE="9.5.1">
|
||||||
|
<name sl="IKT">ICT</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.5.2">
|
||||||
|
<name sl="znak_simbol">sign_symbol</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6">
|
||||||
|
<name sl="naprava_pripomoček">device_apparatus</name>
|
||||||
|
<semanticType SEMCODE="9.6.1">
|
||||||
|
<name sl="naprava">device</name>
|
||||||
|
<semanticType SEMCODE="9.6.1.2">
|
||||||
|
<name sl="elektro_element">electrical_element</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.1.3">
|
||||||
|
<name sl="hišna">domestic</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.1.4">
|
||||||
|
<name sl="komunikacijska">communication</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.1.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.1.5">
|
||||||
|
<name sl="računalniška">computer</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.1.6">
|
||||||
|
<name sl="signalna">signalling</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.1.7">
|
||||||
|
<name sl="svetilna">light</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.1.8">
|
||||||
|
<name sl="zabavna">entertainment</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.2">
|
||||||
|
<name sl="orodje">tool</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.3">
|
||||||
|
<name sl="pripomoček">apparatus</name>
|
||||||
|
<semanticType SEMCODE="9.6.3.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.3.2">
|
||||||
|
<name sl="igrača">toy</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.3.3">
|
||||||
|
<name sl="kuhinjski">kitchen</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.3.4">
|
||||||
|
<name sl="merilni">measuring</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.3.5">
|
||||||
|
<name sl="pisarniški">office</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.3.6">
|
||||||
|
<name sl="svetilni">light</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.3.7">
|
||||||
|
<name sl="športni">sport</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.6.4">
|
||||||
|
<name sl="stroj">machine</name>
|
||||||
|
<semanticType SEMCODE="9.6.4.1">
|
||||||
|
<name sl="del_stroja">part</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.7">
|
||||||
|
<name sl="oblačilo">apparel</name>
|
||||||
|
<semanticType SEMCODE="9.7.1">
|
||||||
|
<name sl="dodatek">accessory</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.7.2">
|
||||||
|
<name sl="nakit">jewellery</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.7.3">
|
||||||
|
<name sl="oblačilo">clothes</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.7.4">
|
||||||
|
<name sl="obutev">footgear</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.7.5">
|
||||||
|
<name sl="pokrivalo">headgear</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.8">
|
||||||
|
<name sl="orožje">weapon</name>
|
||||||
|
<semanticType SEMCODE="9.8.2">
|
||||||
|
<name sl="municija">ammunition</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.8.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.8.3">
|
||||||
|
<name sl="ročno">hand</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.8.4">
|
||||||
|
<name sl="vojaška_naprava">military_device</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.9">
|
||||||
|
<name sl="pohištvo_oprema">furniture_equipment</name>
|
||||||
|
<semanticType SEMCODE="9.9.1">
|
||||||
|
<name sl="del_opreme">part</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.10">
|
||||||
|
<name sl="posoda_shranjevanje">container_vessel</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.11">
|
||||||
|
<name sl="snov">substance</name>
|
||||||
|
<semanticType SEMCODE="9.11.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.11.2">
|
||||||
|
<name sl="farmacevtska">pharmaceutical</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.11.3">
|
||||||
|
<name sl="osebna_nega">body_care</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.12">
|
||||||
|
<name sl="tekstil">fabric</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.13">
|
||||||
|
<name sl="umetniški_izdelek">artwork</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.14">
|
||||||
|
<name sl="vozilo">vehicle</name>
|
||||||
|
<semanticType SEMCODE="9.14.1">
|
||||||
|
<name sl="kopensko">land</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.14.2">
|
||||||
|
<name sl="vesoljsko">space</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.14.3">
|
||||||
|
<name sl="vodno">water</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.14.4">
|
||||||
|
<name sl="zračno">air</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.15">
|
||||||
|
<name sl="zgradba">building</name>
|
||||||
|
<semanticType SEMCODE="9.15.2">
|
||||||
|
<name sl="bivališče">dwelling</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.15.3">
|
||||||
|
<name sl="del_zgradbe">part_of_building</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.15.4">
|
||||||
|
<name sl="delovišče">worksite</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.15.5">
|
||||||
|
<name sl="hramba">storage</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.15.6">
|
||||||
|
<name sl="infrastruktura">infrastructure</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.15.7">
|
||||||
|
<name sl="javnopolitična">public_political</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.15.8">
|
||||||
|
<name sl="kulturna">cultural</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.15.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.15.9">
|
||||||
|
<name sl="storitve">services</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="9.15.10">
|
||||||
|
<name sl="verska">religious</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="15">
|
||||||
|
<name sl="ČAS">TIME</name>
|
||||||
|
<semanticType SEMCODE="15.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="15.2">
|
||||||
|
<name sl="enota">unit</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="15.3">
|
||||||
|
<name sl="obdobje">period</name>
|
||||||
|
<semanticType SEMCODE="15.3.1">
|
||||||
|
<name sl="splošno">general</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="15.3.2">
|
||||||
|
<name sl="zgodovinsko">historic</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="15.4">
|
||||||
|
<name sl="trenutek">moment</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1">
|
||||||
|
<name sl="ČLOVEK">HUMAN</name>
|
||||||
|
<semanticType SEMCODE="1.1">
|
||||||
|
<name sl="aktivnost">activity</name>
|
||||||
|
<semanticType SEMCODE="1.1.2">
|
||||||
|
<name sl="funkcija">function</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.1.3">
|
||||||
|
<name sl="nosilec">agent</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.1.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.1.4">
|
||||||
|
<name sl="poklic">profession</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.1.5">
|
||||||
|
<name sl="poklic_nosilec">profession_agent</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.2">
|
||||||
|
<name sl="lastnost">characteristic</name>
|
||||||
|
<semanticType SEMCODE="1.2.8">
|
||||||
|
<name sl="geografska">geographical</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.2.4">
|
||||||
|
<name sl="nazor">belief</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.2.6">
|
||||||
|
<name sl="nesorodstvena">nonkin</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.2.1">
|
||||||
|
<name sl="drugo">other</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.2.7">
|
||||||
|
<name sl="pripadnost">member</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.2.5">
|
||||||
|
<name sl="sorodstvena">kin</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.2.9">
|
||||||
|
<name sl="status">status</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.2.2">
|
||||||
|
<name sl="telesna">physical</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.2.3">
|
||||||
|
<name sl="umska_mentalna_vedenjska">intellectual_mental_behavioural</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.3">
|
||||||
|
<name sl="mitologija">mythology</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.4">
|
||||||
|
<name sl="naziv">name</name>
|
||||||
|
<semanticType SEMCODE="1.4.3">
|
||||||
|
<name sl="akademski">academic</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.4.2">
|
||||||
|
<name sl="ime">personal</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.4.4">
|
||||||
|
<name sl="naslavljalni">title</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.4.5">
|
||||||
|
<name sl="plemiški">noble</name>
|
||||||
|
</semanticType>
|
||||||
|
<semanticType SEMCODE="1.4.1">
|
||||||
|
<name sl="vzdevek">nickname</name>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
</semanticType>
|
||||||
|
</semanticTypes>
|
4276
examples/structures/structures_sample.xml
Normal file
4276
examples/structures/structures_sample.xml
Normal file
File diff suppressed because it is too large
Load Diff
221
examples/valency_lexicon/VS10_zvonkljati_sloleks22.xml
Executable file
221
examples/valency_lexicon/VS10_zvonkljati_sloleks22.xml
Executable file
|
@ -0,0 +1,221 @@
|
||||||
|
<dictionary xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="valency_lexicon.xsd">
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma>zvonkljati</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit>
|
||||||
|
<lexeme>zvonkljati</lexeme>
|
||||||
|
</lexicalUnit>
|
||||||
|
<grammar>
|
||||||
|
<category>glagol</category>
|
||||||
|
</grammar>
|
||||||
|
<measureList />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<statisticsContainerList>
|
||||||
|
<statisticsContainer>
|
||||||
|
<semanticRole>ACT</semanticRole>
|
||||||
|
<measureList>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_pattern_ratio">0.4000</measure>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_sentence_ratio">0.4000</measure>
|
||||||
|
</measureList>
|
||||||
|
</statisticsContainer>
|
||||||
|
<statisticsContainer>
|
||||||
|
<semanticRole>REC</semanticRole>
|
||||||
|
<measureList>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_pattern_ratio">0.4000</measure>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_sentence_ratio">0.4000</measure>
|
||||||
|
</measureList>
|
||||||
|
</statisticsContainer>
|
||||||
|
<statisticsContainer>
|
||||||
|
<semanticRole>CAUSE</semanticRole>
|
||||||
|
<measureList>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_pattern_ratio">0.2000</measure>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_sentence_ratio">0.2000</measure>
|
||||||
|
</measureList>
|
||||||
|
</statisticsContainer>
|
||||||
|
<statisticsContainer>
|
||||||
|
<semanticRole>MANN</semanticRole>
|
||||||
|
<measureList>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_pattern_ratio">0.2000</measure>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_sentence_ratio">0.2000</measure>
|
||||||
|
</measureList>
|
||||||
|
</statisticsContainer>
|
||||||
|
<statisticsContainer>
|
||||||
|
<semanticRole>LOC</semanticRole>
|
||||||
|
<measureList>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_pattern_ratio">0.6000</measure>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_sentence_ratio">0.6000</measure>
|
||||||
|
</measureList>
|
||||||
|
</statisticsContainer>
|
||||||
|
<statisticsContainer>
|
||||||
|
<semanticRole>COND</semanticRole>
|
||||||
|
<measureList>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_pattern_ratio">0.2000</measure>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_sentence_ratio">0.2000</measure>
|
||||||
|
</measureList>
|
||||||
|
</statisticsContainer>
|
||||||
|
<statisticsContainer>
|
||||||
|
<semanticRole>DUR</semanticRole>
|
||||||
|
<measureList>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_pattern_ratio">0.2000</measure>
|
||||||
|
<measure source="Gigafida 2.0" type="valency_sentence_ratio">0.2000</measure>
|
||||||
|
</measureList>
|
||||||
|
</statisticsContainer>
|
||||||
|
</statisticsContainerList>
|
||||||
|
<senseList>
|
||||||
|
<sense>
|
||||||
|
<definitionList />
|
||||||
|
<valencyPatternList system="JOS">
|
||||||
|
<valencyPattern id="374">
|
||||||
|
<measureList>
|
||||||
|
<measure source="Gigafida 2.0" type="frequency_all">1</measure>
|
||||||
|
</measureList>
|
||||||
|
<semanticRoleContainerList>
|
||||||
|
<semanticRoleContainer>
|
||||||
|
<semanticRole>ACT</semanticRole>
|
||||||
|
<syntacticStructureList>
|
||||||
|
<syntacticStructure id="70" />
|
||||||
|
</syntacticStructureList>
|
||||||
|
</semanticRoleContainer>
|
||||||
|
<semanticRoleContainer>
|
||||||
|
<semanticRole>REC</semanticRole>
|
||||||
|
<syntacticStructureList>
|
||||||
|
<syntacticStructure id="22" />
|
||||||
|
</syntacticStructureList>
|
||||||
|
</semanticRoleContainer>
|
||||||
|
<semanticRoleContainer>
|
||||||
|
<semanticRole>CAUSE</semanticRole>
|
||||||
|
<syntacticStructureList>
|
||||||
|
<syntacticStructure id="13">
|
||||||
|
<component num="2">
|
||||||
|
<lexeme sloleks="274">z</lexeme>
|
||||||
|
</component>
|
||||||
|
</syntacticStructure>
|
||||||
|
</syntacticStructureList>
|
||||||
|
</semanticRoleContainer>
|
||||||
|
</semanticRoleContainerList>
|
||||||
|
<patternRepresentation>KDO/KAJ zvonkljati KOMU/ČEMU ZAKAJ</patternRepresentation>
|
||||||
|
<exampleContainerList>
|
||||||
|
<exampleContainer>
|
||||||
|
<corpusExample corpusName="Gigafida 2.0" exampleId="GF5800693.856.5"><tree role="CAUSE"><comp num="2" structure_id="13">Z</comp> <comp num="3" structure_id="13">baldahina</comp> na odru za orkester</tree> je <comp role="headword">zvonkljala</comp> <tree role="REC"><comp num="2" structure_id="22">školjki</comp></tree> <tree role="ACT">podobna <comp num="1" structure_id="70">skleda</comp> s progastimi belimi rebri</tree>.</corpusExample>
|
||||||
|
</exampleContainer>
|
||||||
|
</exampleContainerList>
|
||||||
|
</valencyPattern>
|
||||||
|
<valencyPattern id="6">
|
||||||
|
<measureList>
|
||||||
|
<measure source="Gigafida 2.0" type="frequency_all">1</measure>
|
||||||
|
</measureList>
|
||||||
|
<semanticRoleContainerList>
|
||||||
|
<semanticRoleContainer>
|
||||||
|
<semanticRole>ACT</semanticRole>
|
||||||
|
<syntacticStructureList>
|
||||||
|
<syntacticStructure id="70" />
|
||||||
|
</syntacticStructureList>
|
||||||
|
</semanticRoleContainer>
|
||||||
|
<semanticRoleContainer>
|
||||||
|
<semanticRole>MANN</semanticRole>
|
||||||
|
<syntacticStructureList>
|
||||||
|
<syntacticStructure id="13">
|
||||||
|
<component num="2">
|
||||||
|
<lexeme sloleks="244">prek</lexeme>
|
||||||
|
</component>
|
||||||
|
</syntacticStructure>
|
||||||
|
</syntacticStructureList>
|
||||||
|
</semanticRoleContainer>
|
||||||
|
</semanticRoleContainerList>
|
||||||
|
<patternRepresentation>KDO/KAJ zvonkljati KAKO</patternRepresentation>
|
||||||
|
<exampleContainerList>
|
||||||
|
<exampleContainer>
|
||||||
|
<corpusExample corpusName="Gigafida 2.0" exampleId="GF0437765.773.8">Rajanje je preplavilo deželo, <tree role="ACT">stari egipčanski, babilonski, azteški, inkovski <comp num="1" structure_id="70">izganjalci</comp> zime, kurenti in Juriji</tree> so <comp role="headword">zvonkljali</comp> <tree role="MANN"><comp num="2" structure_id="13">prek</comp> <comp num="3" structure_id="13">polj</comp></tree>.</corpusExample>
|
||||||
|
</exampleContainer>
|
||||||
|
</exampleContainerList>
|
||||||
|
</valencyPattern>
|
||||||
|
<valencyPattern id="62">
|
||||||
|
<measureList>
|
||||||
|
<measure source="Gigafida 2.0" type="frequency_all">1</measure>
|
||||||
|
</measureList>
|
||||||
|
<semanticRoleContainerList>
|
||||||
|
<semanticRoleContainer>
|
||||||
|
<semanticRole>LOC</semanticRole>
|
||||||
|
<syntacticStructureList>
|
||||||
|
<syntacticStructure id="15">
|
||||||
|
<component num="2">
|
||||||
|
<lexeme sloleks="234">po</lexeme>
|
||||||
|
</component>
|
||||||
|
</syntacticStructure>
|
||||||
|
</syntacticStructureList>
|
||||||
|
</semanticRoleContainer>
|
||||||
|
<semanticRoleContainer>
|
||||||
|
<semanticRole>COND</semanticRole>
|
||||||
|
</semanticRoleContainer>
|
||||||
|
</semanticRoleContainerList>
|
||||||
|
<patternRepresentation>zvonkljati KJE POD KATERIM POGOJEM</patternRepresentation>
|
||||||
|
<exampleContainerList>
|
||||||
|
<exampleContainer>
|
||||||
|
<corpusExample corpusName="Gigafida 2.0" exampleId="GF9947624.333.2">Tako so <comp role="headword">zvonkljali</comp> <tree role="LOC"><comp num="2" structure_id="15">po</comp> izolski <comp num="3" structure_id="15">rivi</comp></tree>, <tree role="COND">da so</tree> še <tree role="COND">ribe gledale iz morja</tree>!</corpusExample>
|
||||||
|
</exampleContainer>
|
||||||
|
</exampleContainerList>
|
||||||
|
</valencyPattern>
|
||||||
|
<valencyPattern id="391">
|
||||||
|
<measureList>
|
||||||
|
<measure source="Gigafida 2.0" type="frequency_all">1</measure>
|
||||||
|
</measureList>
|
||||||
|
<semanticRoleContainerList>
|
||||||
|
<semanticRoleContainer>
|
||||||
|
<semanticRole>REC</semanticRole>
|
||||||
|
<syntacticStructureList>
|
||||||
|
<syntacticStructure id="22" />
|
||||||
|
</syntacticStructureList>
|
||||||
|
</semanticRoleContainer>
|
||||||
|
<semanticRoleContainer>
|
||||||
|
<semanticRole>LOC</semanticRole>
|
||||||
|
<syntacticStructureList>
|
||||||
|
<syntacticStructure id="15">
|
||||||
|
<component num="2">
|
||||||
|
<lexeme sloleks="261">v</lexeme>
|
||||||
|
</component>
|
||||||
|
</syntacticStructure>
|
||||||
|
</syntacticStructureList>
|
||||||
|
</semanticRoleContainer>
|
||||||
|
<semanticRoleContainer>
|
||||||
|
<semanticRole>DUR</semanticRole>
|
||||||
|
</semanticRoleContainer>
|
||||||
|
</semanticRoleContainerList>
|
||||||
|
<patternRepresentation>zvonkljati KOMU/ČEMU KJE KOLIKO ČASA</patternRepresentation>
|
||||||
|
<exampleContainerList>
|
||||||
|
<exampleContainer>
|
||||||
|
<corpusExample corpusName="Gigafida 2.0" exampleId="GF1026195.985.2">Kje so tisti časi, ko se nihče ni razburjal, ker si je sloviti boksar Cassius Clay nadel ime Mohamed Ali in potem svoje nasprotnike še naprej pretepal, da <tree role="REC"><comp num="2" structure_id="22">jim</comp></tree> je še <tree role="DUR">mesece</tree> <comp role="headword">zvonkljalo</comp> <tree role="LOC"><comp num="2" structure_id="15">v</comp> <comp num="3" structure_id="15">glavah</comp></tree>, odtlej pod patronatom Alaha.</corpusExample>
|
||||||
|
</exampleContainer>
|
||||||
|
</exampleContainerList>
|
||||||
|
</valencyPattern>
|
||||||
|
<valencyPattern id="27">
|
||||||
|
<measureList>
|
||||||
|
<measure source="Gigafida 2.0" type="frequency_all">1</measure>
|
||||||
|
</measureList>
|
||||||
|
<semanticRoleContainerList>
|
||||||
|
<semanticRoleContainer>
|
||||||
|
<semanticRole>LOC</semanticRole>
|
||||||
|
<syntacticStructureList>
|
||||||
|
<syntacticStructure id="15">
|
||||||
|
<component num="2">
|
||||||
|
<lexeme sloleks="216">na</lexeme>
|
||||||
|
</component>
|
||||||
|
</syntacticStructure>
|
||||||
|
</syntacticStructureList>
|
||||||
|
</semanticRoleContainer>
|
||||||
|
</semanticRoleContainerList>
|
||||||
|
<patternRepresentation>zvonkljati KJE</patternRepresentation>
|
||||||
|
<exampleContainerList>
|
||||||
|
<exampleContainer>
|
||||||
|
<corpusExample corpusName="Gigafida 2.0" exampleId="GF4530676.1098.1">Šla bova tja, kjer sneg leži v planinah,šla bova tja visoko nad borovje,šla k čredi, ki <comp role="headword">zvonklja</comp> <tree role="LOC"><comp num="2" structure_id="15">na</comp> <comp num="3" structure_id="15">košeninah</comp></tree>,<tree role="LOC">kjer se zavija v mavrično trakovjeJungfrau</tree>, <tree role="LOC">ki zlato sonce ima pod glavo</tree>,<tree role="LOC">kjer bistri jelen šviga skoz meglice</tree>,<tree role="LOC">kjer na oblake</tree>, <tree role="LOC">ki leté v daljavo</tree>,<tree role="LOC">mečejo senco orlov perutnice</tree>.</corpusExample>
|
||||||
|
</exampleContainer>
|
||||||
|
</exampleContainerList>
|
||||||
|
</valencyPattern>
|
||||||
|
</valencyPatternList>
|
||||||
|
</sense>
|
||||||
|
</senseList>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
</dictionary>
|
1578
examples/valency_lexicon/abdicirati.xml
Normal file
1578
examples/valency_lexicon/abdicirati.xml
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -35,12 +35,14 @@
|
||||||
<xsd:element name="sense" minOccurs="0" maxOccurs="unbounded">
|
<xsd:element name="sense" minOccurs="0" maxOccurs="unbounded">
|
||||||
<xsd:complexType>
|
<xsd:complexType>
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
|
<xsd:element name="relatedSenseList" type="relatedSenseListType" minOccurs="0"/>
|
||||||
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
|
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
|
||||||
<xsd:element name="definitionList" type="definitionListType" minOccurs="0"/>
|
<xsd:element name="definitionList" type="definitionListType" minOccurs="0"/>
|
||||||
<xsd:element name="translationContainerList" type="translationContainerListType" minOccurs="0"/>
|
<xsd:element name="translationContainerList" type="translationContainerListType" minOccurs="0"/>
|
||||||
<xsd:element name="exampleContainerList" type="exampleContainerListType" minOccurs="0"/>
|
<xsd:element name="exampleContainerList" type="exampleContainerListType" minOccurs="0"/>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
<xsd:attribute name="id" type="xsd:int"/>
|
<xsd:attribute name="id" type="xsd:int"/>
|
||||||
|
<xsd:attribute name="dummy" type="xsd:boolean"/>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
</xsd:element>
|
</xsd:element>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
|
|
|
@ -20,9 +20,23 @@
|
||||||
<xsd:enumeration value="single"/>
|
<xsd:enumeration value="single"/>
|
||||||
<xsd:enumeration value="phrase"/>
|
<xsd:enumeration value="phrase"/>
|
||||||
<xsd:enumeration value="compound"/>
|
<xsd:enumeration value="compound"/>
|
||||||
|
<xsd:enumeration value="reflexive_verb"/>
|
||||||
</xsd:restriction>
|
</xsd:restriction>
|
||||||
</xsd:simpleType>
|
</xsd:simpleType>
|
||||||
</xsd:attribute>
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="orthography_ids" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="dynamic_accentuation_ids" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="tonemic_accentuation_ids" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="pronunciation_ids" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="hyphenation_ids" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="norm">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="non-standard"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
|
||||||
</xsd:extension>
|
</xsd:extension>
|
||||||
</xsd:simpleContent>
|
</xsd:simpleContent>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
@ -48,6 +62,13 @@
|
||||||
</xsd:simpleContent>
|
</xsd:simpleContent>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- List of grammatical features -->
|
||||||
|
<xsd:complexType name="grammarFeatureListType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="grammarFeature" type="grammarFeatureType" minOccurs="0" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
<!-- Lemma-disambiguating feature, more open-ended than grammarFeatureType -->
|
<!-- Lemma-disambiguating feature, more open-ended than grammarFeatureType -->
|
||||||
<xsd:complexType name="homonymyFeatureType">
|
<xsd:complexType name="homonymyFeatureType">
|
||||||
<xsd:simpleContent>
|
<xsd:simpleContent>
|
||||||
|
@ -93,8 +114,10 @@
|
||||||
<xsd:element name="component" type="componentType" minOccurs="2" maxOccurs="unbounded"/>
|
<xsd:element name="component" type="componentType" minOccurs="2" maxOccurs="unbounded"/>
|
||||||
</xsd:choice>
|
</xsd:choice>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
<xsd:attribute name="id" type="xsd:int" use="required"/>
|
<xsd:attribute name="id" type="xsd:int"/>
|
||||||
<xsd:attribute name="type" use="required"> <!-- types of headwords allowed; hmm, probably should standardise allowed settings -->
|
<xsd:attribute name="sloleksId" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="sloleksKey" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="type"> <!-- types of headwords allowed; hmm, probably should standardise allowed settings -->
|
||||||
<xsd:simpleType>
|
<xsd:simpleType>
|
||||||
<xsd:restriction base="xsd:string">
|
<xsd:restriction base="xsd:string">
|
||||||
<xsd:enumeration value="single"/>
|
<xsd:enumeration value="single"/>
|
||||||
|
@ -121,13 +144,25 @@
|
||||||
<xsd:enumeration value="medmet"/>
|
<xsd:enumeration value="medmet"/>
|
||||||
<xsd:enumeration value="okrajšava"/>
|
<xsd:enumeration value="okrajšava"/>
|
||||||
<xsd:enumeration value="neuvrščeno"/>
|
<xsd:enumeration value="neuvrščeno"/>
|
||||||
|
<xsd:enumeration value="stalna zveza"/>
|
||||||
|
<xsd:enumeration value="frazem"/>
|
||||||
|
<xsd:enumeration value="povratni glagol"/>
|
||||||
</xsd:restriction>
|
</xsd:restriction>
|
||||||
</xsd:simpleType>
|
</xsd:simpleType>
|
||||||
|
|
||||||
|
<xsd:complexType name="subcategoryType">
|
||||||
|
<xsd:simpleContent>
|
||||||
|
<xsd:extension base="xsd:string">
|
||||||
|
<xsd:attribute name="type" type="xsd:string"/>
|
||||||
|
</xsd:extension>
|
||||||
|
</xsd:simpleContent>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
<!-- Grammatical information -->
|
<!-- Grammatical information -->
|
||||||
<xsd:complexType name="grammarType">
|
<xsd:complexType name="grammarType">
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
<xsd:element name="category" type="categoryType" minOccurs="0"/> <!-- identifies the part of speech, unless it's not a single lexeme -->
|
<xsd:element name="category" type="categoryType" minOccurs="0"/> <!-- identifies the part of speech, unless it's not a single lexeme -->
|
||||||
|
<xsd:element name="subcategory" type="subcategoryType" minOccurs="0" maxOccurs="unbounded"/> <!-- marks relevant subcategory information -->
|
||||||
<xsd:element name="grammarFeature" type="grammarFeatureType" minOccurs="0" maxOccurs="unbounded"/> <!-- relevant grammatical features -->
|
<xsd:element name="grammarFeature" type="grammarFeatureType" minOccurs="0" maxOccurs="unbounded"/> <!-- relevant grammatical features -->
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
@ -137,6 +172,7 @@
|
||||||
<xsd:simpleContent>
|
<xsd:simpleContent>
|
||||||
<xsd:extension base="xsd:decimal">
|
<xsd:extension base="xsd:decimal">
|
||||||
<xsd:attribute name="type" type="xsd:string" use="required"/> <!-- type of measure (e.g., frequency, logDice); hmm, probably should be enumerated -->
|
<xsd:attribute name="type" type="xsd:string" use="required"/> <!-- type of measure (e.g., frequency, logDice); hmm, probably should be enumerated -->
|
||||||
|
<xsd:attribute name="corpus_id" type="xsd:int"/>
|
||||||
<xsd:attribute name="source" type="xsd:string"/> <!-- context of the measure, probably corpus name and version -->
|
<xsd:attribute name="source" type="xsd:string"/> <!-- context of the measure, probably corpus name and version -->
|
||||||
</xsd:extension>
|
</xsd:extension>
|
||||||
</xsd:simpleContent>
|
</xsd:simpleContent>
|
||||||
|
@ -182,6 +218,27 @@
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- Related sense -->
|
||||||
|
<xsd:complexType name="relatedSenseType">
|
||||||
|
<xsd:simpleContent>
|
||||||
|
<xsd:extension base="xsd:string">
|
||||||
|
<xsd:attribute name="sense_id" type="xsd:int"/>
|
||||||
|
<xsd:attribute name="senseKey" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="lexical_unit_id" type="xsd:int"/>
|
||||||
|
<xsd:attribute name="type" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="synonymType" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="synonymScore" type="xsd:decimal"/>
|
||||||
|
</xsd:extension>
|
||||||
|
</xsd:simpleContent>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- List of related senses -->
|
||||||
|
<xsd:complexType name="relatedSenseListType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="relatedSense" type="relatedSenseType" minOccurs="0" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
<!-- Name-value pair for providing extra information -->
|
<!-- Name-value pair for providing extra information -->
|
||||||
<xsd:complexType name="labelType">
|
<xsd:complexType name="labelType">
|
||||||
<xsd:simpleContent>
|
<xsd:simpleContent>
|
||||||
|
@ -221,7 +278,7 @@
|
||||||
<xsd:complexType name="translationType">
|
<xsd:complexType name="translationType">
|
||||||
<xsd:simpleContent>
|
<xsd:simpleContent>
|
||||||
<xsd:extension base="xsd:string">
|
<xsd:extension base="xsd:string">
|
||||||
<xsd:attribute name="targetLang" type="xsd:string" use="required"/> <!-- target language -->
|
<xsd:attribute name="targetLang" type="xsd:string"/> <!-- target language -->
|
||||||
<xsd:attribute name="source" type="xsd:string"/> <!-- where the translation comes from -->
|
<xsd:attribute name="source" type="xsd:string"/> <!-- where the translation comes from -->
|
||||||
<xsd:attribute name="audio" type="xsd:string"/> <!-- legacy ID associated with an audio file -->
|
<xsd:attribute name="audio" type="xsd:string"/> <!-- legacy ID associated with an audio file -->
|
||||||
</xsd:extension>
|
</xsd:extension>
|
||||||
|
@ -252,8 +309,19 @@
|
||||||
</xsd:restriction>
|
</xsd:restriction>
|
||||||
</xsd:simpleType>
|
</xsd:simpleType>
|
||||||
</xsd:attribute>
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="lemma" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="msd" type="xsd:string"/> <!-- msd within the Slovene JOS system -->
|
||||||
<xsd:attribute name="sloleks" type="xsd:string"/> <!-- Sloleks ID -->
|
<xsd:attribute name="sloleks" type="xsd:string"/> <!-- Sloleks ID -->
|
||||||
<xsd:attribute name="kol" type="xsd:string"/> <!-- legacy attribute for the word's lemma -->
|
<xsd:attribute name="kol" type="xsd:string"/> <!-- legacy attribute for the word's lemma -->
|
||||||
|
<xsd:attribute name="lexical_unit_lexeme_id" type="xsd:int"/> <!-- ID within its lexical unit -->
|
||||||
|
<xsd:attribute name="lexeme_id" type="xsd:int"/> <!-- ID of the component's lexeme -->
|
||||||
|
<xsd:attribute name="space"> <!-- is the component followed by a space? only used if it isn't -->
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="false"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
</xsd:extension>
|
</xsd:extension>
|
||||||
</xsd:simpleContent>
|
</xsd:simpleContent>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
@ -261,7 +329,7 @@
|
||||||
<!-- Mixed-content container element used in examples where roles can include multiple word components (e.g., semantic roles). -->
|
<!-- Mixed-content container element used in examples where roles can include multiple word components (e.g., semantic roles). -->
|
||||||
<xsd:complexType name="exampleTreeType" mixed="true">
|
<xsd:complexType name="exampleTreeType" mixed="true">
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
<xsd:element name="comp" type="compType" maxOccurs="unbounded"/> <!-- the tree contains one or more components -->
|
<xsd:element name="comp" type="compType" minOccurs="0" maxOccurs="unbounded"/> <!-- the tree contains one or more components -->
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
<xsd:attribute name="role" type="semanticRoleType"/>
|
<xsd:attribute name="role" type="semanticRoleType"/>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
@ -275,6 +343,7 @@
|
||||||
</xsd:choice>
|
</xsd:choice>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
<xsd:attribute name="corpus_id" type="xsd:int"/>
|
<xsd:attribute name="corpus_id" type="xsd:int"/>
|
||||||
|
<xsd:attribute name="corpusName" type="xsd:string"/>
|
||||||
<xsd:attribute name="example_id" type="xsd:int"/>
|
<xsd:attribute name="example_id" type="xsd:int"/>
|
||||||
<xsd:attribute name="exampleId" type="xsd:string"/> <!-- the example's ID within the corpus itself -->
|
<xsd:attribute name="exampleId" type="xsd:string"/> <!-- the example's ID within the corpus itself -->
|
||||||
<xsd:attribute name="modified" type="xsd:boolean"/> <!-- is the example text different than in the corpus? -->
|
<xsd:attribute name="modified" type="xsd:boolean"/> <!-- is the example text different than in the corpus? -->
|
||||||
|
@ -298,6 +367,7 @@
|
||||||
<xsd:attribute name="lexical_unit_id" type="xsd:int"/>
|
<xsd:attribute name="lexical_unit_id" type="xsd:int"/>
|
||||||
<xsd:attribute name="structure_id" type="xsd:int"/>
|
<xsd:attribute name="structure_id" type="xsd:int"/>
|
||||||
<xsd:attribute name="structureName" type="xsd:string"/> <!-- a string name for the structure, especially if we don't have its ID -->
|
<xsd:attribute name="structureName" type="xsd:string"/> <!-- a string name for the structure, especially if we don't have its ID -->
|
||||||
|
<xsd:attribute name="status" type="xsd:string"/>
|
||||||
<xsd:attribute name="frequency" type="xsd:int"/>
|
<xsd:attribute name="frequency" type="xsd:int"/>
|
||||||
<xsd:attribute name="logDice" type="xsd:decimal"/>
|
<xsd:attribute name="logDice" type="xsd:decimal"/>
|
||||||
<xsd:attribute name="audio" type="xsd:string"/> <!-- legacy ID associated with an audio file -->
|
<xsd:attribute name="audio" type="xsd:string"/> <!-- legacy ID associated with an audio file -->
|
||||||
|
@ -308,8 +378,13 @@
|
||||||
<xsd:complexType name="translationContainerType">
|
<xsd:complexType name="translationContainerType">
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
|
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
|
||||||
|
<xsd:choice>
|
||||||
|
<xsd:sequence>
|
||||||
<xsd:element name="translation" type="translationType"/>
|
<xsd:element name="translation" type="translationType"/>
|
||||||
<xsd:element name="explanation" type="xsd:string" minOccurs="0"/> <!-- hmm: what's expected here? -->
|
<xsd:element name="explanationList" type="explanationListType" minOccurs="0"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:element name="explanationList" type="explanationListType"/>
|
||||||
|
</xsd:choice>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
<xsd:attribute name="cluster" type="xsd:int"/> <!-- number identifying a group of clustered translations -->
|
<xsd:attribute name="cluster" type="xsd:int"/> <!-- number identifying a group of clustered translations -->
|
||||||
<xsd:attribute name="source" type="xsd:string"/> <!-- hmm, we already have @source under translationType, that's not enough? -->
|
<xsd:attribute name="source" type="xsd:string"/> <!-- hmm, we already have @source under translationType, that's not enough? -->
|
||||||
|
@ -322,6 +397,22 @@
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- Explanation (in contrast with translation) -->
|
||||||
|
<xsd:complexType name="explanationType">
|
||||||
|
<xsd:simpleContent>
|
||||||
|
<xsd:extension base="xsd:string">
|
||||||
|
<xsd:attribute name="language" type="xsd:string"/>
|
||||||
|
</xsd:extension>
|
||||||
|
</xsd:simpleContent>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- List of explanations -->
|
||||||
|
<xsd:complexType name="explanationListType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="explanation" type="explanationType" minOccurs="0" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
<!-- Wrapper including an example and (possibly) its translation -->
|
<!-- Wrapper including an example and (possibly) its translation -->
|
||||||
<xsd:complexType name="exampleContainerType">
|
<xsd:complexType name="exampleContainerType">
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
|
@ -329,7 +420,7 @@
|
||||||
<xsd:element name="corpusExample" type="corpusExampleType"/>
|
<xsd:element name="corpusExample" type="corpusExampleType"/>
|
||||||
<xsd:element name="multiwordExample" type="multipleLexemeExampleType"/>
|
<xsd:element name="multiwordExample" type="multipleLexemeExampleType"/>
|
||||||
</xsd:choice>
|
</xsd:choice>
|
||||||
<xsd:element name="translationContainer" type="translationContainerType" minOccurs="0"/>
|
<xsd:element name="translationContainerList" type="translationContainerListType" minOccurs="0"/>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
|
||||||
|
@ -420,7 +511,7 @@
|
||||||
<xsd:complexType name="semanticRoleContainerType">
|
<xsd:complexType name="semanticRoleContainerType">
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
<xsd:element name="semanticRole" type="semanticRoleType"/>
|
<xsd:element name="semanticRole" type="semanticRoleType"/>
|
||||||
<xsd:element name="syntacticStructureList" type="syntacticStructureListType"/>
|
<xsd:element name="syntacticStructureList" type="syntacticStructureListType" minOccurs="0" maxOccurs="1"/>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
<xsd:attribute name="cluster" type="xsd:int"/> <!-- hmm, number identifying a group of clustered roles? -->
|
<xsd:attribute name="cluster" type="xsd:int"/> <!-- hmm, number identifying a group of clustered roles? -->
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
@ -453,6 +544,141 @@
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
<xsd:element name="valencyPattern" type="valencyPatternType" minOccurs="0" maxOccurs="unbounded"/>
|
<xsd:element name="valencyPattern" type="valencyPatternType" minOccurs="0" maxOccurs="unbounded"/>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="system"> <!-- the system within which the structure is defined -->
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="JOS"/>
|
||||||
|
<xsd:enumeration value="UD"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- MSD code, in a particular language -->
|
||||||
|
<xsd:complexType name="msdType">
|
||||||
|
<xsd:simpleContent>
|
||||||
|
<xsd:extension base="xsd:string">
|
||||||
|
<xsd:attribute name="language" type="xsd:string" use="required"/>
|
||||||
|
<xsd:attribute name="system" type="xsd:string" use="required"/>
|
||||||
|
</xsd:extension>
|
||||||
|
</xsd:simpleContent>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- Container for all kinds of form representation lists -->
|
||||||
|
<xsd:complexType name="formRepresentationsType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="orthographyList" type="orthographyListType"/>
|
||||||
|
<xsd:element name="accentuationList" type="accentuationListType" minOccurs="0" maxOccurs="2"/>
|
||||||
|
<xsd:element name="pronunciationList" type="pronunciationListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="hyphenationList" type="hyphenationListType" minOccurs="0"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="orthographyType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="form" type="formType"/>
|
||||||
|
<xsd:element name="measureList" type="measureListType" minOccurs="0"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="id" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="morphologyPatterns" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="status" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="norm" type="xsd:string"/> <!-- closed set? -->
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="orthographyListType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="orthography" type="orthographyType" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="accentuationType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="form" type="formType" minOccurs="0"/>
|
||||||
|
<xsd:element name="measureList" type="measureListType" minOccurs="0"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="id" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="orthography_id" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="accentuationPatterns" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="status" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="norm" type="xsd:string"/>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="accentuationListType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="accentuation" type="accentuationType" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="type">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="dynamic"/>
|
||||||
|
<xsd:enumeration value="tonemic"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:simpleType name="formType">
|
||||||
|
<xsd:restriction base="xsd:string"/>
|
||||||
|
</xsd:simpleType>
|
||||||
|
|
||||||
|
<xsd:complexType name="pronunciationFormType">
|
||||||
|
<xsd:simpleContent>
|
||||||
|
<xsd:extension base="xsd:string">
|
||||||
|
<xsd:attribute name="script" type="xsd:string" use="required"/>
|
||||||
|
</xsd:extension>
|
||||||
|
</xsd:simpleContent>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="pronunciationType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="form" type="pronunciationFormType" maxOccurs="unbounded"/>
|
||||||
|
<xsd:element name="measureList" type="measureListType" minOccurs="0"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="id" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="dynamic_accentuation_id" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="tonemic_accentuation_id" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="status" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="norm" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="type" type="xsd:string"/>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="pronunciationListType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="pronunciation" type="pronunciationType" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="hyphenationType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="form" type="formType"/>
|
||||||
|
<xsd:element name="measureList" type="measureListType" minOccurs="0"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="id" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="orthography_id" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="status" type="xsd:string"/>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="hyphenationListType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="hyphenation" type="hyphenationType" minOccurs="0" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- Word form wrapper -->
|
||||||
|
<xsd:complexType name="wordFormType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="msd" type="msdType" maxOccurs="unbounded"/>
|
||||||
|
<xsd:element name="grammarFeatureList" type="grammarFeatureListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="formRepresentations" type="formRepresentationsType"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="id" type="xsd:int"/>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<!-- List of word forms -->
|
||||||
|
<xsd:complexType name="wordFormListType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="wordForm" type="wordFormType" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
|
||||||
</xsd:schema>
|
</xsd:schema>
|
||||||
|
|
|
@ -35,11 +35,14 @@
|
||||||
<xsd:element name="sense" minOccurs="0" maxOccurs="unbounded">
|
<xsd:element name="sense" minOccurs="0" maxOccurs="unbounded">
|
||||||
<xsd:complexType>
|
<xsd:complexType>
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
|
<xsd:element name="relatedSenseList" type="relatedSenseListType" minOccurs="0"/>
|
||||||
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
|
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
|
||||||
<xsd:element name="definitionList" type="definitionListType" minOccurs="0"/>
|
<xsd:element name="definitionList" type="definitionListType" minOccurs="0"/>
|
||||||
<xsd:element name="exampleContainerList" type="exampleContainerListType" minOccurs="0"/>
|
<xsd:element name="exampleContainerList" type="exampleContainerListType" minOccurs="0"/>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
<xsd:attribute name="id" type="xsd:int"/>
|
<xsd:attribute name="id" type="xsd:int"/>
|
||||||
|
<xsd:attribute name="key" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="dummy" type="xsd:boolean"/>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
</xsd:element>
|
</xsd:element>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
schemaLocation="http://www.w3.org/2001/xml.xsd" />
|
schemaLocation="http://www.w3.org/2001/xml.xsd" />
|
||||||
<xsd:include schemaLocation="inventory.xsd"/>
|
<xsd:include schemaLocation="inventory.xsd"/>
|
||||||
|
|
||||||
<xsd:element name="dictionary">
|
<xsd:element name="lexicon">
|
||||||
<xsd:complexType>
|
<xsd:complexType>
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
<xsd:element name="entry" minOccurs="0" maxOccurs="unbounded">
|
<xsd:element name="entry" minOccurs="0" maxOccurs="unbounded">
|
||||||
|
@ -16,29 +16,19 @@
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
<xsd:element name="status" type="statusType" minOccurs="0"/>
|
<xsd:element name="status" type="statusType" minOccurs="0"/>
|
||||||
<xsd:element name="headword" type="headwordType"/>
|
<xsd:element name="headword" type="headwordType"/>
|
||||||
<xsd:element name="lexicalUnit" type="lexicalUnitType" minOccurs="0"/>
|
<xsd:element name="lexicalUnit" type="lexicalUnitType"/>
|
||||||
<xsd:element name="grammar" type="grammarType" minOccurs="0"/>
|
<xsd:element name="grammar" type="grammarType"/>
|
||||||
<xsd:element name="measureList" type="measureListType" minOccurs="0"/>
|
<xsd:element name="measureList" type="measureListType"/>
|
||||||
|
<xsd:element name="relatedEntryList" type="relatedEntryListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
|
||||||
|
<xsd:element name="comment" type="commentType" minOccurs="0"/>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
</xsd:element>
|
</xsd:element>
|
||||||
<xsd:element name="body">
|
<xsd:element name="body">
|
||||||
<xsd:complexType>
|
<xsd:complexType>
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
<xsd:element name="senseList">
|
<xsd:element name="wordFormList" type="wordFormListType"/>
|
||||||
<xsd:complexType>
|
|
||||||
<xsd:sequence>
|
|
||||||
<xsd:element name="sense" minOccurs="0" maxOccurs="unbounded">
|
|
||||||
<xsd:complexType>
|
|
||||||
<xsd:sequence>
|
|
||||||
<xsd:element name="syntacticStructureList" type="syntacticStructureListType"/>
|
|
||||||
</xsd:sequence>
|
|
||||||
<xsd:attribute name="id" type="xsd:int"/>
|
|
||||||
</xsd:complexType>
|
|
||||||
</xsd:element>
|
|
||||||
</xsd:sequence>
|
|
||||||
</xsd:complexType>
|
|
||||||
</xsd:element>
|
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
</xsd:element>
|
</xsd:element>
|
32
resources/schema/semantic_types.xsd
Normal file
32
resources/schema/semantic_types.xsd
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
|
||||||
|
|
||||||
|
<xsd:import namespace="http://www.w3.org/XML/1998/namespace"
|
||||||
|
schemaLocation="http://www.w3.org/2001/xml.xsd" />
|
||||||
|
|
||||||
|
<xsd:complexType name="nameType">
|
||||||
|
<xsd:simpleContent>
|
||||||
|
<xsd:extension base="xsd:string">
|
||||||
|
<xsd:attribute name="sl" type="xsd:string"/>
|
||||||
|
</xsd:extension>
|
||||||
|
</xsd:simpleContent>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="semanticTypeType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="name" type="nameType"/>
|
||||||
|
<xsd:element name="semanticType" minOccurs="0" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="id" type="xsd:int"/>
|
||||||
|
<xsd:attribute name="SEMCODE" type="xsd:string"/>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:element name="semanticTypes">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="semanticType" type="semanticTypeType" minOccurs="0" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
|
||||||
|
</xsd:schema>
|
350
resources/schema/structures.xsd
Normal file
350
resources/schema/structures.xsd
Normal file
|
@ -0,0 +1,350 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
|
||||||
|
|
||||||
|
<xsd:import namespace="http://www.w3.org/XML/1998/namespace"
|
||||||
|
schemaLocation="http://www.w3.org/2001/xml.xsd" />
|
||||||
|
|
||||||
|
<xsd:complexType name="restrictionType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="feature" type="restrictionFeatureType" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="type">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="morphology"/>
|
||||||
|
<xsd:enumeration value="lexis"/>
|
||||||
|
<xsd:enumeration value="space"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="restrictionOrType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="restriction" type="restrictionType" minOccurs="2" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="representationType">
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="feature" type="representationFeatureType" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="restrictionFeatureType">
|
||||||
|
<xsd:attribute name="POS">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="noun"/>
|
||||||
|
<xsd:enumeration value="verb"/>
|
||||||
|
<xsd:enumeration value="adjective"/>
|
||||||
|
<xsd:enumeration value="adverb"/>
|
||||||
|
<xsd:enumeration value="pronoun"/>
|
||||||
|
<xsd:enumeration value="numeral"/>
|
||||||
|
<xsd:enumeration value="preposition"/>
|
||||||
|
<xsd:enumeration value="conjunction"/>
|
||||||
|
<xsd:enumeration value="particle"/>
|
||||||
|
<xsd:enumeration value="interjection"/>
|
||||||
|
<xsd:enumeration value="abbreviation"/>
|
||||||
|
<xsd:enumeration value="residual"/>
|
||||||
|
<xsd:enumeration value="punctuation"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="case">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="nominative"/>
|
||||||
|
<xsd:enumeration value="genitive"/>
|
||||||
|
<xsd:enumeration value="dative"/>
|
||||||
|
<xsd:enumeration value="accusative"/>
|
||||||
|
<xsd:enumeration value="locative"/>
|
||||||
|
<xsd:enumeration value="instrumental"/>
|
||||||
|
<xsd:enumeration value="genitive|accusative"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="type">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="main"/>
|
||||||
|
<xsd:enumeration value="auxiliary"/>
|
||||||
|
<xsd:enumeration value="reflexive"/>
|
||||||
|
<xsd:enumeration value="common"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="form">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="digit"/>
|
||||||
|
<xsd:enumeration value="roman"/>
|
||||||
|
<xsd:enumeration value="letter"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="clitic">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="yes"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="negative">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="yes"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="filter">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="negative"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="vform">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="present"/>
|
||||||
|
<xsd:enumeration value="infinitive"/>
|
||||||
|
<xsd:enumeration value="supine"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="lemma">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="in|ali"/>
|
||||||
|
<xsd:enumeration value="kot|kakor|ko"/>
|
||||||
|
<xsd:enumeration value="ne"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="level">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="lemma"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="contact">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="neither"/>
|
||||||
|
<xsd:enumeration value="left"/>
|
||||||
|
<xsd:enumeration value="right"/>
|
||||||
|
<xsd:enumeration value="both"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="representationFeatureType">
|
||||||
|
<xsd:attribute name="rendition">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="word_form"/>
|
||||||
|
<xsd:enumeration value="lemma"/>
|
||||||
|
<xsd:enumeration value="lexis"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="selection">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="msd"/>
|
||||||
|
<xsd:enumeration value="agreement"/>
|
||||||
|
<xsd:enumeration value="all"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="msd">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="number"/>
|
||||||
|
<xsd:enumeration value="gender+number+case"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="head_cid" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="case">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="nominative"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="vform">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="present"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="person">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="third"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="string">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="ne"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="format">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="lowercase"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="componentsComponentType">
|
||||||
|
<xsd:attribute name="cid" use="required" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="type" use="required">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="core"/>
|
||||||
|
<xsd:enumeration value="other"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="label" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="status">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="forbidden"/>
|
||||||
|
<xsd:enumeration value="obligatory"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:complexType name="dependencyType">
|
||||||
|
<xsd:attribute name="from" use="required">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:union>
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:integer"/>
|
||||||
|
</xsd:simpleType>
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="#"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:union>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="to" use="required" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="label" use="required">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="#"/>
|
||||||
|
<xsd:enumeration value="modra"/>
|
||||||
|
<xsd:enumeration value="del"/>
|
||||||
|
<xsd:enumeration value="dol"/>
|
||||||
|
<xsd:enumeration value="ena"/>
|
||||||
|
<xsd:enumeration value="dve"/>
|
||||||
|
<xsd:enumeration value="tri"/>
|
||||||
|
<xsd:enumeration value="štiri"/>
|
||||||
|
<xsd:enumeration value="prir"/>
|
||||||
|
<xsd:enumeration value="vez"/>
|
||||||
|
<xsd:enumeration value="skup"/>
|
||||||
|
<xsd:enumeration value="tri|štiri"/>
|
||||||
|
<xsd:enumeration value="dve|tri|štiri"/>
|
||||||
|
<xsd:enumeration value="dol|dve|tri|štiri"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
<xsd:attribute name="order">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="to-from"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
|
<xsd:element name="syntactic_structures">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="syntactic_structure" minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="components">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="component" type="componentsComponentType" minOccurs="0" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="order" use="required">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="fixed"/>
|
||||||
|
<xsd:enumeration value="variable"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="dependencies">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="dependency" type="dependencyType" minOccurs="0" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="definition">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="component" minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:choice minOccurs="0">
|
||||||
|
<xsd:element name="restriction" type="restrictionType" maxOccurs="unbounded"/>
|
||||||
|
<xsd:element name="restriction_or" type="restrictionOrType" maxOccurs="unbounded"/>
|
||||||
|
</xsd:choice>
|
||||||
|
<xsd:element name="representation" type="representationType" minOccurs="0" maxOccurs="unbounded"/>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="cid" type="xsd:integer"/>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:sequence>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="id" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="parent_id" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="tempId" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="label" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="formal_descendants" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="type" use="required">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="single"/>
|
||||||
|
<xsd:enumeration value="collocation"/>
|
||||||
|
<xsd:enumeration value="formal"/>
|
||||||
|
<xsd:enumeration value="other"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="system_type" use="required">
|
||||||
|
<xsd:simpleType>
|
||||||
|
<xsd:restriction base="xsd:string">
|
||||||
|
<xsd:enumeration value="JOS"/>
|
||||||
|
</xsd:restriction>
|
||||||
|
</xsd:simpleType>
|
||||||
|
</xsd:attribute>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
|
||||||
|
</xsd:schema>
|
|
@ -6,10 +6,11 @@ validated both with this schema and with an appropriate more specific schema bas
|
||||||
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="http://www.w3.org/2001/xml.xsd" />
|
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="http://www.w3.org/2001/xml.xsd" />
|
||||||
|
|
||||||
<!-- top-level element is a dictionary -->
|
<!-- top-level element is a dictionary -->
|
||||||
<xsd:element name="dictionary" type="dictionaryType"/>
|
<xsd:element name="dictionary" type="rootType"/>
|
||||||
|
<xsd:element name="lexicon" type="rootType"/>
|
||||||
|
|
||||||
<!-- top-level type consists of sequence of entries -->
|
<!-- top-level type consists of sequence of entries -->
|
||||||
<xsd:complexType name="dictionaryType">
|
<xsd:complexType name="rootType">
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
<xsd:element name="entry" type="entryType" minOccurs="0" maxOccurs="unbounded"/>
|
<xsd:element name="entry" type="entryType" minOccurs="0" maxOccurs="unbounded"/>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
|
|
27
scripts/delexonomise.py
Normal file
27
scripts/delexonomise.py
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import lxml.etree as lxml
|
||||||
|
import sys
|
||||||
|
import codecs
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
|
||||||
|
input_file_name = sys.argv[1]
|
||||||
|
output_file_name = sys.argv[2]
|
||||||
|
temp_file_name = '/tmp/temp.xml'
|
||||||
|
|
||||||
|
tree = lxml.parse(input_file_name)
|
||||||
|
root = tree.getroot()
|
||||||
|
root.tag = 'dictionary'
|
||||||
|
tree.write(temp_file_name, encoding='UTF-8', pretty_print=True)
|
||||||
|
|
||||||
|
output_file = codecs.open(output_file_name, 'w')
|
||||||
|
temp_file = codecs.open(temp_file_name, 'r')
|
||||||
|
for line in temp_file:
|
||||||
|
line = re.sub(' xmlns:lxnm=".*?"', '', line)
|
||||||
|
line = re.sub(' lxnm:.+?=".*?"', '', line)
|
||||||
|
output_file.write(line)
|
||||||
|
temp_file.close()
|
||||||
|
output_file.close()
|
||||||
|
|
||||||
|
os.remove(temp_file_name)
|
20
scripts/get_sample.py
Normal file
20
scripts/get_sample.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import lxml.etree as lxml
|
||||||
|
import sys
|
||||||
|
import random
|
||||||
|
|
||||||
|
input_file_name = sys.argv[1]
|
||||||
|
sample_size = int(sys.argv[2])
|
||||||
|
output_file_name = sys.argv[3]
|
||||||
|
|
||||||
|
tree = lxml.parse(input_file_name)
|
||||||
|
root = tree.getroot()
|
||||||
|
indexes = list(range(len(root.xpath('entry'))))
|
||||||
|
random.shuffle(indexes)
|
||||||
|
|
||||||
|
for (index, entry) in enumerate(root.xpath('entry')):
|
||||||
|
if (index not in indexes[:sample_size]):
|
||||||
|
root.remove(entry)
|
||||||
|
|
||||||
|
tree.write(output_file_name, encoding='UTF-8')
|
|
@ -4,7 +4,8 @@ script_dir="$(dirname "$(readlink -f "$0")")"
|
||||||
cd $script_dir
|
cd $script_dir
|
||||||
|
|
||||||
schema_dir=../resources/schema
|
schema_dir=../resources/schema
|
||||||
top_schema=$schema_dir/top_dictionaries.xsd # for now we only have dictionaries
|
top_schema=$schema_dir/top_dictionaries.xsd
|
||||||
|
dictionary_types=("bilingual_dictionaries collocation_dictionary monolingual_dictionaries morphological_lexicon valency_lexicon")
|
||||||
|
|
||||||
example_dir=../examples
|
example_dir=../examples
|
||||||
for example_subdir in $example_dir/*
|
for example_subdir in $example_dir/*
|
||||||
|
@ -13,11 +14,13 @@ do
|
||||||
bottom_schema=$schema_dir/$base.xsd
|
bottom_schema=$schema_dir/$base.xsd
|
||||||
for example_file in $example_subdir/*.xml
|
for example_file in $example_subdir/*.xml
|
||||||
do
|
do
|
||||||
|
case $base in bilingual_dictionaries|collocation_dictionary|monolingual_dictionaries|morphological_lexicon|valency_lexicon)
|
||||||
command="xmllint -schema $top_schema $example_file --noout"
|
command="xmllint -schema $top_schema $example_file --noout"
|
||||||
echo $command
|
echo $command
|
||||||
if ! $command; then
|
if ! $command; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
esac
|
||||||
command="xmllint -schema $bottom_schema $example_file --noout"
|
command="xmllint -schema $bottom_schema $example_file --noout"
|
||||||
echo $command
|
echo $command
|
||||||
if ! $command; then
|
if ! $command; then
|
||||||
|
|
Loading…
Reference in New Issue
Block a user