Redmine #1224: made sloleks-related adjustments to inventory and added related examples
This commit is contained in:
parent
13791bdabe
commit
76c406dffc
86
examples/morphological_lexicon/Amanda.xml
Normal file
86
examples/morphological_lexicon/Amanda.xml
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
<lexicon>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma orthography_ids="342" dynamic_accentuation_ids="81" pronunciation_ids="32|33" hyphenation_ids="7" tonemic_accentuation_ids="85">Amanda</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit sloleksId="LE_c77074df05d2719216d477bd01d418ad" sloleksKey="S_Amanda" type="single">
|
||||||
|
<lexeme lexical_unit_lexeme_id="83224">Amanda</lexeme>
|
||||||
|
</lexicalUnit>
|
||||||
|
<grammar>
|
||||||
|
<category>samostalnik</category>
|
||||||
|
<subcategory type="pronunciation">ambiguous</subcategory>
|
||||||
|
<grammarFeature name="type">proper</grammarFeature>
|
||||||
|
<grammarFeature name="gender">feminine</grammarFeature>
|
||||||
|
</grammar>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">285818</measure>
|
||||||
|
</measureList>
|
||||||
|
<relatedEntryList/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<wordFormList>
|
||||||
|
<wordForm id="333330">
|
||||||
|
<msd language="sl" system="JOS">Slzei</msd>
|
||||||
|
<msd language="en" system="UD">PROPN|...</msd>
|
||||||
|
<grammarFeatureList>
|
||||||
|
<grammarFeature name="number">singular</grammarFeature>
|
||||||
|
<grammarFeature name="case">nominative</grammarFeature>
|
||||||
|
</grammarFeatureList>
|
||||||
|
<formRepresentations>
|
||||||
|
<orthographyList>
|
||||||
|
<orthography id="342" morphologyPatterns="Sz.1.2.0">
|
||||||
|
<form>Amanda</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">813</measure>
|
||||||
|
<measure type="frequency" source="GOS 1.0">432</measure>
|
||||||
|
</measureList>
|
||||||
|
</orthography>
|
||||||
|
</orthographyList>
|
||||||
|
<accentuationList type="dynamic">
|
||||||
|
<accentuation id="81" orthography_id="342" accentuationPatterns="cjvt:AP.1|zrc:S202b">
|
||||||
|
<form>Amánda</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">77</measure>
|
||||||
|
</measureList>
|
||||||
|
</accentuation>
|
||||||
|
<accentuation id="82" orthography_id="342" accentuationPatterns="cjvt:no_accentuation|zrc:no_accentuation"/>
|
||||||
|
</accentuationList>
|
||||||
|
<accentuationList type="tonemic">
|
||||||
|
<accentuation id="85" orthography_id="342" accentuationPatterns="zrc:S202b|zrc:S202i">
|
||||||
|
<form>Amànda</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</accentuation>
|
||||||
|
</accentuationList>
|
||||||
|
<pronunciationList>
|
||||||
|
<pronunciation id="32" dynamic_accentuation_id="115" type="slovene">
|
||||||
|
<form script="IPA">aˈmaːnda</form>
|
||||||
|
<form script="SAMPA">a"ma:nda</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
<pronunciation id="33" dynamic_accentuation_id="82" type="english">
|
||||||
|
<form script="IPA">əˈmɛːnda</form>
|
||||||
|
<form script="SAMPA">ə"mE:nda</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
</pronunciationList>
|
||||||
|
<hyphenationList>
|
||||||
|
<hyphenation id="7" orthography_id="342">
|
||||||
|
<form>A|man|da</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</hyphenation>
|
||||||
|
</hyphenationList>
|
||||||
|
</formRepresentations>
|
||||||
|
</wordForm>
|
||||||
|
</wordFormList>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
</lexicon>
|
75
examples/morphological_lexicon/PCR.xml
Normal file
75
examples/morphological_lexicon/PCR.xml
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
<lexicon>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma orthography_ids="342" dynamic_accentuation_ids="81" pronunciation_ids="32|33|34" hyphenation_ids="" tonemic_accentuation_ids="85">PCR</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit sloleksId="LE_c77074df05d2719216d477bd01d418ad" sloleksKey="S_PCR" type="single">
|
||||||
|
<lexeme lexical_unit_lexeme_id="83224">PCR</lexeme>
|
||||||
|
</lexicalUnit>
|
||||||
|
<grammar>
|
||||||
|
<category>samostalnik</category>
|
||||||
|
<subcategory type="pronunciation">acronym</subcategory>
|
||||||
|
<grammarFeature name="type">common</grammarFeature>
|
||||||
|
<grammarFeature name="gender">masculine</grammarFeature>
|
||||||
|
</grammar>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">285818</measure>
|
||||||
|
</measureList>
|
||||||
|
<relatedEntryList/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<wordFormList>
|
||||||
|
<wordForm id="333330">
|
||||||
|
<msd language="sl" system="JOS">Somei</msd>
|
||||||
|
<msd language="en" system="UD">NOUN|...</msd>
|
||||||
|
<grammarFeatureList>
|
||||||
|
<grammarFeature name="number">singular</grammarFeature>
|
||||||
|
<grammarFeature name="case">nominative</grammarFeature>
|
||||||
|
</grammarFeatureList>
|
||||||
|
<formRepresentations>
|
||||||
|
<orthographyList>
|
||||||
|
<orthography id="342" morphologyPatterns="Sm.5.1.o">
|
||||||
|
<form>PCR</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">813</measure>
|
||||||
|
<measure type="frequency" source="GOS 1.0">432</measure>
|
||||||
|
</measureList>
|
||||||
|
</orthography>
|
||||||
|
</orthographyList>
|
||||||
|
<accentuationList type="dynamic">
|
||||||
|
<accentuation id="81" orthography_id="342" accentuationPatterns="cjvt:no_accentuation"/>
|
||||||
|
</accentuationList>
|
||||||
|
<accentuationList type="tonemic">
|
||||||
|
<accentuation id="85" orthography_id="342" accentuationPatterns="zrc:no_accentuation"/>
|
||||||
|
</accentuationList>
|
||||||
|
<pronunciationList>
|
||||||
|
<pronunciation id="32" dynamic_accentuation_id="81" type="slovene_spelling_short" status="preferred">
|
||||||
|
<form script="IPA">ˈpəˈtsəˈrə</form>
|
||||||
|
<form script="SAMPA">"p@"ts@"r@</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
<pronunciation id="33" dynamic_accentuation_id="81" type="slovene_spelling_long">
|
||||||
|
<form script="IPA">ˈpeːˈʦeːˈɛɾ</form>
|
||||||
|
<form script="SAMPA">"pe:"tse:"E:r</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
<pronunciation id="34" dynamic_accentuation_id="81" type="english_spelling">
|
||||||
|
<form script="IPA">ˈpiːˈsiːˈaːɾ</form>
|
||||||
|
<form script="SAMPA">"pi:"si:"a:r</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
</pronunciationList>
|
||||||
|
<hyphenationList/>
|
||||||
|
</formRepresentations>
|
||||||
|
</wordForm>
|
||||||
|
</wordFormList>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
</lexicon>
|
70
examples/morphological_lexicon/Shakespeare.xml
Normal file
70
examples/morphological_lexicon/Shakespeare.xml
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
<lexicon>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma orthography_ids="342" dynamic_accentuation_ids="81" pronunciation_ids="32|33" hyphenation_ids="" tonemic_accentuation_ids="85">Shakespeare</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit sloleksId="LE_c77074df05d2719216d477bd01d418ad" sloleksKey="S_Shakespeare" type="single">
|
||||||
|
<lexeme lexical_unit_lexeme_id="83224">Shakespeare</lexeme>
|
||||||
|
</lexicalUnit>
|
||||||
|
<grammar>
|
||||||
|
<category>samostalnik</category>
|
||||||
|
<subcategory type="pronunciation">other</subcategory>
|
||||||
|
<grammarFeature name="type">proper</grammarFeature>
|
||||||
|
<grammarFeature name="gender">masculine</grammarFeature>
|
||||||
|
</grammar>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">285818</measure>
|
||||||
|
</measureList>
|
||||||
|
<relatedEntryList/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<wordFormList>
|
||||||
|
<wordForm id="333330">
|
||||||
|
<msd language="sl" system="JOS">Slmei</msd>
|
||||||
|
<msd language="en" system="UD">PROPN|...</msd>
|
||||||
|
<grammarFeatureList>
|
||||||
|
<grammarFeature name="number">singular</grammarFeature>
|
||||||
|
<grammarFeature name="case">nominative</grammarFeature>
|
||||||
|
</grammarFeatureList>
|
||||||
|
<formRepresentations>
|
||||||
|
<orthographyList>
|
||||||
|
<orthography id="342" morphologyPatterns="Sm.5.1.o">
|
||||||
|
<form>Shakespeare</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">813</measure>
|
||||||
|
<measure type="frequency" source="GOS 1.0">432</measure>
|
||||||
|
</measureList>
|
||||||
|
</orthography>
|
||||||
|
</orthographyList>
|
||||||
|
<accentuationList type="dynamic">
|
||||||
|
<accentuation id="81" orthography_id="342" accentuationPatterns="cjvt:no_accentuation"/>
|
||||||
|
</accentuationList>
|
||||||
|
<accentuationList type="tonemic">
|
||||||
|
<accentuation id="82" orthography_id="342" accentuationPatterns="cjvt:no_accentuation"/>
|
||||||
|
</accentuationList>
|
||||||
|
<pronunciationList>
|
||||||
|
<pronunciation id="32" dynamic_accentuation_id="81" status="preferred">
|
||||||
|
<form script="IPA">ˈʃɛːɪkspiɾ</form>
|
||||||
|
<form script="SAMPA">"SE:Ikspir</form>
|
||||||
|
<form script="slovene_phonetization">šêjkspir</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
<pronunciation id="33" dynamic_accentuation_id="81">
|
||||||
|
<form script="IPA">ˈʃeːkspiɾ</form>
|
||||||
|
<form script="SAMPA">"Se:kspir</form>
|
||||||
|
<form script="slovene_phonetization">šékspir</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
</pronunciationList>
|
||||||
|
<hyphenationList/>
|
||||||
|
</formRepresentations>
|
||||||
|
</wordForm>
|
||||||
|
</wordFormList>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
</lexicon>
|
84
examples/morphological_lexicon/gostja.xml
Normal file
84
examples/morphological_lexicon/gostja.xml
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
<lexicon>
|
||||||
|
<entry>
|
||||||
|
<head>
|
||||||
|
<headword>
|
||||||
|
<lemma orthography_ids="342" dynamic_accentuation_ids="81" pronunciation_ids="32" hyphenation_ids="7|8" tonemic_accentuation_ids="85">gostja</lemma>
|
||||||
|
</headword>
|
||||||
|
<lexicalUnit sloleksId="LE_c77074df05d2719216d477bd01d418ad" sloleksKey="S_gostja" type="single">
|
||||||
|
<lexeme lexical_unit_lexeme_id="83224">gostja</lexeme>
|
||||||
|
</lexicalUnit>
|
||||||
|
<grammar>
|
||||||
|
<category>samostalnik</category>
|
||||||
|
<subcategory type="pronunciation">slovene</subcategory>
|
||||||
|
<grammarFeature name="type">common</grammarFeature>
|
||||||
|
<grammarFeature name="gender">feminine</grammarFeature>
|
||||||
|
</grammar>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">285818</measure>
|
||||||
|
</measureList>
|
||||||
|
<relatedEntryList/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<wordFormList>
|
||||||
|
<wordForm id="333330">
|
||||||
|
<msd language="sl" system="JOS">Sozei</msd>
|
||||||
|
<msd language="en" system="UD">NOUN|...</msd>
|
||||||
|
<grammarFeatureList>
|
||||||
|
<grammarFeature name="number">singular</grammarFeature>
|
||||||
|
<grammarFeature name="case">nominative</grammarFeature>
|
||||||
|
</grammarFeatureList>
|
||||||
|
<formRepresentations>
|
||||||
|
<orthographyList>
|
||||||
|
<orthography id="342" morphologyPatterns="Sz.1.2.0">
|
||||||
|
<form>gostja</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">813</measure>
|
||||||
|
<measure type="frequency" source="GOS 1.0">432</measure>
|
||||||
|
</measureList>
|
||||||
|
</orthography>
|
||||||
|
</orthographyList>
|
||||||
|
<accentuationList type="dynamic">
|
||||||
|
<accentuation id="81" orthography_id="342" accentuationPatterns="cjvt:AP.1|zrc:S202b" status="preferred">
|
||||||
|
<form>gôstja</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="Gigafida 2.0">77</measure>
|
||||||
|
</measureList>
|
||||||
|
</accentuation>
|
||||||
|
</accentuationList>
|
||||||
|
<accentuationList type="tonemic">
|
||||||
|
<accentuation id="85" orthography_id="342" accentuationPatterns="zrc:S202b|zrc:S202i" status="preferred">
|
||||||
|
<form>góstja</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</accentuation>
|
||||||
|
</accentuationList>
|
||||||
|
<pronunciationList>
|
||||||
|
<pronunciation id="32" dynamic_accentuation_id="81" tonemic_accentuation_id="85">
|
||||||
|
<form script="IPA">ˈɡo:stja</form>
|
||||||
|
<form script="SAMPA">"gO:stja</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</pronunciation>
|
||||||
|
</pronunciationList>
|
||||||
|
<hyphenationList>
|
||||||
|
<hyphenation id="7" orthography_id="342" status="preferred">
|
||||||
|
<form>go|stja</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</hyphenation>
|
||||||
|
<hyphenation id="8" orthography_id="342">
|
||||||
|
<form>gos|tja</form>
|
||||||
|
<measureList>
|
||||||
|
<measure type="frequency" source="XXX">7777</measure>
|
||||||
|
</measureList>
|
||||||
|
</hyphenation>
|
||||||
|
</hyphenationList>
|
||||||
|
</formRepresentations>
|
||||||
|
</wordForm>
|
||||||
|
</wordFormList>
|
||||||
|
</body>
|
||||||
|
</entry>
|
||||||
|
</lexicon>
|
|
@ -1,158 +0,0 @@
|
||||||
<lexicon>
|
|
||||||
<entry>
|
|
||||||
<head>
|
|
||||||
<headword>
|
|
||||||
<lemma orthography_ids="342" dynamic_accentuation_ids="81|83" pronunciation_ids="32" hyphenation_ids="" tonemic_accentuation_ids="85">agencija</lemma>
|
|
||||||
</headword>
|
|
||||||
<lexicalUnit sloleksId="LE_c77074df05d2719216d477bd01d418ad" sloleksKey="S_megla" type="single">
|
|
||||||
<lexeme lexical_unit_lexeme_id="83224">megla</lexeme>
|
|
||||||
</lexicalUnit>
|
|
||||||
<grammar>
|
|
||||||
<category>samostalnik</category>
|
|
||||||
<grammarFeature name="type">common</grammarFeature>
|
|
||||||
<grammarFeature name="gender">feminine</grammarFeature>
|
|
||||||
<!--grammarFeature name="pattern">Sz.1.2.o</grammarFeature-->
|
|
||||||
</grammar>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="Gigafida 2.0">285818</measure>
|
|
||||||
</measureList>
|
|
||||||
<relatedEntryList/>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<wordFormList>
|
|
||||||
<wordForm id="333330">
|
|
||||||
<msd language="sl" system="JOS">Sozdr</msd>
|
|
||||||
<msd language="en" system="UD">AUX|...</msd>
|
|
||||||
<grammarFeatureList>
|
|
||||||
<grammarFeature name="number">dual</grammarFeature>
|
|
||||||
<grammarFeature name="case">genitive</grammarFeature>
|
|
||||||
</grammarFeatureList>
|
|
||||||
<formRepresentations>
|
|
||||||
<orthographyList>
|
|
||||||
|
|
||||||
<orthography id="342" morphologyPatterns="MP.1" status="preferred">
|
|
||||||
<form>megel</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="Gigafida 2.0">813</measure>
|
|
||||||
<measure type="frequency" source="GOS 1.0">432</measure>
|
|
||||||
</measureList>
|
|
||||||
</orthography>
|
|
||||||
<orthography id="343" morphologyPatterns="cjvt:MP.2">
|
|
||||||
<form>megla</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="Gigafida 2.0">9999</measure>
|
|
||||||
<measure type="frequency" source="GOS 1.0">888</measure>
|
|
||||||
</measureList>
|
|
||||||
</orthography>
|
|
||||||
</orthographyList>
|
|
||||||
<!-- Pri accentuantion in tonemic gre v resnici za dva načina naglaševanja, ali dovoljujemo potem povsem ločena elementa "tonemic" in "accentuation"? Če hočemo, da je vse pod "accentuation" in se razlikujeta po "type", potem je problem v glavi, kjer pri lemi navajamo npr. "accentuation_id", ki bi se potem moral pojaviti dvakrat? -->
|
|
||||||
<!-- Pri nekaterih vzorcih se stvari zelo ponavljajo; pri ZRC-ju npr. ločujejo meglA in mEgla -->
|
|
||||||
<accentuationList type="dynamic">
|
|
||||||
<accentuation id="81" orthography_id="342" stressPatterns="cjvt:AP.1|zrc:S202b" status="preferred">
|
|
||||||
<form>mègel</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</accentuation>
|
|
||||||
<accentuation id="82" orthography_id="343" stressPatterns="AP.1">
|
|
||||||
<form>meglá</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</accentuation>
|
|
||||||
<accentuation id="83" orthography_id="342" stressPatterns="AP.2" status="preferred">
|
|
||||||
<form>mègel</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</accentuation>
|
|
||||||
<accentuation id="84" orthography_id="343" stressPatterns="AP.2">
|
|
||||||
<form>meglá</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</accentuation>
|
|
||||||
</accentuationList>
|
|
||||||
<accentuationList type="tonemic">
|
|
||||||
<!-- tonemicPatterns je pri ZRC-ju v resnici združen z oblikoslovnim in naglasnim vzorcem, kar je mogoče problematično, saj bi ga v Sloleksu potem morali navesti tako pod orthography kot pod accentuation/tonemic. -->
|
|
||||||
<accentuation id="85" orthography_id="342" stressPatterns="S202b|S202i" status="preferred">
|
|
||||||
<form>mȅgel</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</accentuation>
|
|
||||||
<accentuation id="86" orthography_id="343" stressPatterns="S202b">
|
|
||||||
<form>meglā</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</accentuation>
|
|
||||||
<accentuation id="87" orthography_id="342" stressPatterns="S202i">
|
|
||||||
<form>mȅgel</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</accentuation>
|
|
||||||
<accentuation id="88" orthography_id="343" stressPatterns="S202i">
|
|
||||||
<form>meglā</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</accentuation>
|
|
||||||
</accentuationList>
|
|
||||||
<pronunciationList>
|
|
||||||
<pronunciation id="32" dynamic_accentuation_id="81" tonemic_accentuation_id="85" status="preferred">
|
|
||||||
<form script="IPA">mɛˈɡel</form>
|
|
||||||
<form script="SAMPA">mE"ɡel</form>
|
|
||||||
<measureList><!-- Ali obstaja več measureListov, po eden za vsak form? Ali pa povezujemo vsak measure z ustreznim form? -->
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</pronunciation>
|
|
||||||
<pronunciation id="33" dynamic_accentuation_id="82" tonemic_accentuation_id="86">
|
|
||||||
<form script="IPA">mɛˈɡla</form>
|
|
||||||
<form script="SAMPA">mE"ɡla</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</pronunciation>
|
|
||||||
<pronunciation id="34" dynamic_accentuation_id="82" norm="non-standard">
|
|
||||||
<form script="IPA">mɛˈγla</form>
|
|
||||||
<form script="SAMPA">mE"γla</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</pronunciation>
|
|
||||||
<pronunciation id="35" dynamic_accentuation_id="81" norm="non-standard">
|
|
||||||
<form script="IPA">mɛˈγel</form>
|
|
||||||
<form script="SAMPA">mE"γel</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</pronunciation>
|
|
||||||
</pronunciationList>
|
|
||||||
<hyphenationList>
|
|
||||||
<hyphenation id="7" orthography_id="342">
|
|
||||||
<form>me|gel</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</hyphenation>
|
|
||||||
<hyphenation id="8" orthography_id="81">
|
|
||||||
<form>mè|gel</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</hyphenation>
|
|
||||||
<hyphenation id="8" orthography_id="82">
|
|
||||||
<form>me|glá</form>
|
|
||||||
<measureList>
|
|
||||||
<measure type="frequency" source="XXX">7777</measure>
|
|
||||||
</measureList>
|
|
||||||
</hyphenation>
|
|
||||||
</hyphenationList>
|
|
||||||
</formRepresentations>
|
|
||||||
</wordForm>
|
|
||||||
</wordFormList>
|
|
||||||
</body>
|
|
||||||
</entry>
|
|
||||||
</lexicon>
|
|
|
@ -147,10 +147,19 @@
|
||||||
</xsd:restriction>
|
</xsd:restriction>
|
||||||
</xsd:simpleType>
|
</xsd:simpleType>
|
||||||
|
|
||||||
|
<xsd:complexType name="subcategoryType">
|
||||||
|
<xsd:simpleContent>
|
||||||
|
<xsd:extension base="xsd:string">
|
||||||
|
<xsd:attribute name="type" type="xsd:string"/>
|
||||||
|
</xsd:extension>
|
||||||
|
</xsd:simpleContent>
|
||||||
|
</xsd:complexType>
|
||||||
|
|
||||||
<!-- Grammatical information -->
|
<!-- Grammatical information -->
|
||||||
<xsd:complexType name="grammarType">
|
<xsd:complexType name="grammarType">
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
<xsd:element name="category" type="categoryType" minOccurs="0"/> <!-- identifies the part of speech, unless it's not a single lexeme -->
|
<xsd:element name="category" type="categoryType" minOccurs="0"/> <!-- identifies the part of speech, unless it's not a single lexeme -->
|
||||||
|
<xsd:element name="subcategory" type="subcategoryType" minOccurs="0" maxOccurs="unbounded"/> <!-- marks relevant subcategory information -->
|
||||||
<xsd:element name="grammarFeature" type="grammarFeatureType" minOccurs="0" maxOccurs="unbounded"/> <!-- relevant grammatical features -->
|
<xsd:element name="grammarFeature" type="grammarFeatureType" minOccurs="0" maxOccurs="unbounded"/> <!-- relevant grammatical features -->
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
@ -575,12 +584,12 @@
|
||||||
|
|
||||||
<xsd:complexType name="accentuationType">
|
<xsd:complexType name="accentuationType">
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
<xsd:element name="form" type="formType"/>
|
<xsd:element name="form" type="formType" minOccurs="0"/>
|
||||||
<xsd:element name="measureList" type="measureListType" minOccurs="0"/>
|
<xsd:element name="measureList" type="measureListType" minOccurs="0"/>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
<xsd:attribute name="id" type="xsd:integer"/>
|
<xsd:attribute name="id" type="xsd:integer"/>
|
||||||
<xsd:attribute name="orthography_id" type="xsd:integer"/>
|
<xsd:attribute name="orthography_id" type="xsd:integer"/>
|
||||||
<xsd:attribute name="stressPatterns" type="xsd:string"/>
|
<xsd:attribute name="accentuationPatterns" type="xsd:string"/>
|
||||||
<xsd:attribute name="status" type="xsd:string"/>
|
<xsd:attribute name="status" type="xsd:string"/>
|
||||||
<xsd:attribute name="norm" type="xsd:string"/>
|
<xsd:attribute name="norm" type="xsd:string"/>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
@ -621,6 +630,7 @@
|
||||||
<xsd:attribute name="tonemic_accentuation_id" type="xsd:integer"/>
|
<xsd:attribute name="tonemic_accentuation_id" type="xsd:integer"/>
|
||||||
<xsd:attribute name="status" type="xsd:string"/>
|
<xsd:attribute name="status" type="xsd:string"/>
|
||||||
<xsd:attribute name="norm" type="xsd:string"/>
|
<xsd:attribute name="norm" type="xsd:string"/>
|
||||||
|
<xsd:attribute name="type" type="xsd:string"/>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
|
||||||
<xsd:complexType name="pronunciationListType">
|
<xsd:complexType name="pronunciationListType">
|
||||||
|
@ -636,11 +646,12 @@
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
<xsd:attribute name="id" type="xsd:integer"/>
|
<xsd:attribute name="id" type="xsd:integer"/>
|
||||||
<xsd:attribute name="orthography_id" type="xsd:integer"/>
|
<xsd:attribute name="orthography_id" type="xsd:integer"/>
|
||||||
|
<xsd:attribute name="status" type="xsd:string"/>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
|
||||||
<xsd:complexType name="hyphenationListType">
|
<xsd:complexType name="hyphenationListType">
|
||||||
<xsd:sequence>
|
<xsd:sequence>
|
||||||
<xsd:element name="hyphenation" type="hyphenationType" maxOccurs="unbounded"/>
|
<xsd:element name="hyphenation" type="hyphenationType" minOccurs="0" maxOccurs="unbounded"/>
|
||||||
</xsd:sequence>
|
</xsd:sequence>
|
||||||
</xsd:complexType>
|
</xsd:complexType>
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user