xml_schemas/examples/morphological_lexicon/megla.xml

159 lines
6.2 KiB
XML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<lexicon>
<entry>
<head>
<headword>
<lemma orthography_ids="342" dynamic_accentuation_ids="81|83" pronunciation_ids="32" hyphenation_ids="" tonemic_accentuation_ids="85">agencija</lemma>
</headword>
<lexicalUnit sloleksId="LE_c77074df05d2719216d477bd01d418ad" sloleksKey="S_megla" type="single">
<lexeme lexical_unit_lexeme_id="83224">megla</lexeme>
</lexicalUnit>
<grammar>
<category>samostalnik</category>
<grammarFeature name="type">common</grammarFeature>
<grammarFeature name="gender">feminine</grammarFeature>
<!--grammarFeature name="pattern">Sz.1.2.o</grammarFeature-->
</grammar>
<measureList>
<measure type="frequency" source="Gigafida 2.0">285818</measure>
</measureList>
<relatedEntryList/>
</head>
<body>
<wordFormList>
<wordForm id="333330">
<msd language="sl" system="JOS">Sozdr</msd>
<msd language="en" system="UD">AUX|...</msd>
<grammarFeatureList>
<grammarFeature name="number">dual</grammarFeature>
<grammarFeature name="case">genitive</grammarFeature>
</grammarFeatureList>
<formRepresentations>
<orthographyList>
<orthography id="342" morphologyPatterns="MP.1" status="preferred">
<form>megel</form>
<measureList>
<measure type="frequency" source="Gigafida 2.0">813</measure>
<measure type="frequency" source="GOS 1.0">432</measure>
</measureList>
</orthography>
<orthography id="343" morphologyPatterns="cjvt:MP.2">
<form>megla</form>
<measureList>
<measure type="frequency" source="Gigafida 2.0">9999</measure>
<measure type="frequency" source="GOS 1.0">888</measure>
</measureList>
</orthography>
</orthographyList>
<!-- Pri accentuantion in tonemic gre v resnici za dva načina naglaševanja, ali dovoljujemo potem povsem ločena elementa "tonemic" in "accentuation"? Če hočemo, da je vse pod "accentuation" in se razlikujeta po "type", potem je problem v glavi, kjer pri lemi navajamo npr. "accentuation_id", ki bi se potem moral pojaviti dvakrat? -->
<!-- Pri nekaterih vzorcih se stvari zelo ponavljajo; pri ZRC-ju npr. ločujejo meglA in mEgla -->
<accentuationList type="dynamic">
<accentuation id="81" orthography_id="342" stressPatterns="cjvt:AP.1|zrc:S202b" status="preferred">
<form>mègel</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</accentuation>
<accentuation id="82" orthography_id="343" stressPatterns="AP.1">
<form>meglá</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</accentuation>
<accentuation id="83" orthography_id="342" stressPatterns="AP.2" status="preferred">
<form>mègel</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</accentuation>
<accentuation id="84" orthography_id="343" stressPatterns="AP.2">
<form>meglá</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</accentuation>
</accentuationList>
<accentuationList type="tonemic">
<!-- tonemicPatterns je pri ZRC-ju v resnici združen z oblikoslovnim in naglasnim vzorcem, kar je mogoče problematično, saj bi ga v Sloleksu potem morali navesti tako pod orthography kot pod accentuation/tonemic. -->
<accentuation id="85" orthography_id="342" stressPatterns="S202b|S202i" status="preferred">
<form>mȅgel</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</accentuation>
<accentuation id="86" orthography_id="343" stressPatterns="S202b">
<form>meglā</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</accentuation>
<accentuation id="87" orthography_id="342" stressPatterns="S202i">
<form>mȅgel</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</accentuation>
<accentuation id="88" orthography_id="343" stressPatterns="S202i">
<form>meglā</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</accentuation>
</accentuationList>
<pronunciationList>
<pronunciation id="32" dynamic_accentuation_id="81" tonemic_accentuation_id="85" status="preferred">
<form script="IPA">ˈɡel</form>
<form script="SAMPA">mE"ɡel</form>
<measureList><!-- Ali obstaja več measureListov, po eden za vsak form? Ali pa povezujemo vsak measure z ustreznim form? -->
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</pronunciation>
<pronunciation id="33" dynamic_accentuation_id="82" tonemic_accentuation_id="86">
<form script="IPA">ˈɡla</form>
<form script="SAMPA">mE"ɡla</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</pronunciation>
<pronunciation id="34" dynamic_accentuation_id="82" norm="non-standard">
<form script="IPA">ˈγla</form>
<form script="SAMPA">mE"γla</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</pronunciation>
<pronunciation id="35" dynamic_accentuation_id="81" norm="non-standard">
<form script="IPA">ˈγel</form>
<form script="SAMPA">mE"γel</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</pronunciation>
</pronunciationList>
<hyphenationList>
<hyphenation id="7" orthography_id="342">
<form>me|gel</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</hyphenation>
<hyphenation id="8" orthography_id="81">
<form>mè|gel</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</hyphenation>
<hyphenation id="8" orthography_id="82">
<form>me|glá</form>
<measureList>
<measure type="frequency" source="XXX">7777</measure>
</measureList>
</hyphenation>
</hyphenationList>
</formRepresentations>
</wordForm>
</wordFormList>
</body>
</entry>
</lexicon>