IssueID #1224: replaced example for morphological lexicon
This commit is contained in:
		
							parent
							
								
									8a1c1bc812
								
							
						
					
					
						commit
						68f5d8cbd5
					
				
							
								
								
									
										158
									
								
								examples/morphological_lexicon/megla.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										158
									
								
								examples/morphological_lexicon/megla.xml
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,158 @@
 | 
			
		||||
<lexicon>
 | 
			
		||||
  <entry>
 | 
			
		||||
    <head>
 | 
			
		||||
      <headword>
 | 
			
		||||
	<lemma orthography_id="343" dynamic_accentuation_id="81" pronunciation_id="32" hyphenation_id="99" tonemic_accentuation_id="777">agencija</lemma>
 | 
			
		||||
      </headword>
 | 
			
		||||
      <lexicalUnit sloleksId="LE_c77074df05d2719216d477bd01d418ad" sloleksKey="S_megla" type="single">
 | 
			
		||||
	<lexeme lexical_unit_lexeme_id="83224">megla</lexeme>
 | 
			
		||||
      </lexicalUnit>
 | 
			
		||||
      <grammar>
 | 
			
		||||
	<category>samostalnik</category>
 | 
			
		||||
	<grammarFeature name="type">common</grammarFeature>
 | 
			
		||||
	<grammarFeature name="gender">feminine</grammarFeature>
 | 
			
		||||
	<!--grammarFeature name="pattern">Sz.1.2.o</grammarFeature-->
 | 
			
		||||
      </grammar>
 | 
			
		||||
      <measureList>
 | 
			
		||||
	<measure type="frequency" source="Gigafida 2.0">285818</measure>
 | 
			
		||||
      </measureList>
 | 
			
		||||
      <relatedEntryList/>
 | 
			
		||||
    </head>
 | 
			
		||||
    <body>
 | 
			
		||||
      <wordFormList>
 | 
			
		||||
		<wordForm id="333330">
 | 
			
		||||
		  <msd language="sl" system="JOS">Sozdr</msd>
 | 
			
		||||
		  <msd language="en" system="UD">AUX|...</msd>
 | 
			
		||||
		  <grammarFeatureList>
 | 
			
		||||
			<grammarFeature name="number">dual</grammarFeature>
 | 
			
		||||
			<grammarFeature name="case">genitive</grammarFeature>
 | 
			
		||||
		  </grammarFeatureList>
 | 
			
		||||
		  <formRepresentations>
 | 
			
		||||
			<orthographyList>
 | 
			
		||||
			
 | 
			
		||||
			  <orthography id="342" morphologyPatterns="MP.1" status="preferred">
 | 
			
		||||
				<form>megel</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="Gigafida 2.0">813</measure>
 | 
			
		||||
				  <measure type="frequency" source="GOS 1.0">432</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </orthography>
 | 
			
		||||
			  <orthography id="343" morphologyPatterns="cjvt:MP.2">
 | 
			
		||||
				<form>megla</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="Gigafida 2.0">9999</measure>
 | 
			
		||||
				  <measure type="frequency" source="GOS 1.0">888</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </orthography>
 | 
			
		||||
			</orthographyList>
 | 
			
		||||
			<!-- Pri accentuantion in tonemic gre v resnici za dva načina naglaševanja, ali dovoljujemo potem povsem ločena elementa "tonemic" in "accentuation"? Če hočemo, da je vse pod "accentuation" in se razlikujeta po "type", potem je problem v glavi, kjer pri lemi navajamo npr. "accentuation_id", ki bi se potem moral pojaviti dvakrat? -->
 | 
			
		||||
			<!-- Pri nekaterih vzorcih se stvari zelo ponavljajo; pri ZRC-ju npr. ločujejo meglA in mEgla -->
 | 
			
		||||
			<accentuationList type="dynamic">
 | 
			
		||||
			  <accentuation id="81" orthography_id="342" stressPatterns="cjvt:AP.1|zrc:S202b" status="preferred">
 | 
			
		||||
				<form>mègel</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </accentuation>
 | 
			
		||||
			  <accentuation id="82" orthography_id="343" stressPatterns="AP.1">
 | 
			
		||||
				<form>meglá</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </accentuation>
 | 
			
		||||
			  <accentuation id="83" orthography_id="342" stressPatterns="AP.2" status="preferred">
 | 
			
		||||
				<form>mègel</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </accentuation>
 | 
			
		||||
			  <accentuation id="84" orthography_id="343" stressPatterns="AP.2">
 | 
			
		||||
				<form>meglá</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </accentuation>
 | 
			
		||||
			</accentuationList>
 | 
			
		||||
			<accentuationList type="tonemic">
 | 
			
		||||
			  <!-- tonemicPatterns je pri ZRC-ju v resnici združen z oblikoslovnim in naglasnim vzorcem, kar je mogoče problematično, saj bi ga v Sloleksu potem morali navesti tako pod orthography kot pod accentuation/tonemic. -->
 | 
			
		||||
			  <accentuation id="85" orthography_id="342" stressPatterns="S202b|S202i" status="preferred">
 | 
			
		||||
				<form>mȅgel</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </accentuation>
 | 
			
		||||
			  <accentuation id="86" orthography_id="343" stressPatterns="S202b">
 | 
			
		||||
				<form>meglā</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </accentuation>
 | 
			
		||||
			  <accentuation id="87" orthography_id="342" stressPatterns="S202i">
 | 
			
		||||
				<form>mȅgel</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </accentuation>
 | 
			
		||||
			  <accentuation id="88" orthography_id="343" stressPatterns="S202i">
 | 
			
		||||
				<form>meglā</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </accentuation>
 | 
			
		||||
			</accentuationList>
 | 
			
		||||
			<pronunciationList>
 | 
			
		||||
			  <pronunciation id="32" dynamic_accentuation_id="81" tonemic_accentuation_id="85" status="preferred">
 | 
			
		||||
				<form script="IPA">mɛˈɡel</form>
 | 
			
		||||
				<form script="SAMPA">mE"ɡel</form>
 | 
			
		||||
				<measureList><!-- Ali obstaja več measureListov, po eden za vsak form? Ali pa povezujemo vsak measure z ustreznim form? -->
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </pronunciation>
 | 
			
		||||
			  <pronunciation id="33" dynamic_accentuation_id="82" tonemic_accentuation_id="86">
 | 
			
		||||
				<form script="IPA">mɛˈɡla</form>
 | 
			
		||||
				<form script="SAMPA">mE"ɡla</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </pronunciation>
 | 
			
		||||
			  <pronunciation id="34" dynamic_accentuation_id="82" norm="non-standard">
 | 
			
		||||
				<form script="IPA">mɛˈγla</form>
 | 
			
		||||
				<form script="SAMPA">mE"γla</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </pronunciation>
 | 
			
		||||
			  <pronunciation id="35" dynamic_accentuation_id="81" norm="non-standard">
 | 
			
		||||
				<form script="IPA">mɛˈγel</form>
 | 
			
		||||
				<form script="SAMPA">mE"γel</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </pronunciation>
 | 
			
		||||
			</pronunciationList>
 | 
			
		||||
			<hyphenationList>
 | 
			
		||||
			  <hyphenation id="7" orthography_id="342">
 | 
			
		||||
				<form>me|gel</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
		      </hyphenation>
 | 
			
		||||
			  <hyphenation id="8" orthography_id="81">
 | 
			
		||||
				<form>mè|gel</form>
 | 
			
		||||
				<measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
		      </hyphenation>
 | 
			
		||||
			  <hyphenation id="8" orthography_id="82">
 | 
			
		||||
				<form>me|glá</form>
 | 
			
		||||
			    <measureList>
 | 
			
		||||
				  <measure type="frequency" source="XXX">7777</measure>
 | 
			
		||||
				</measureList>
 | 
			
		||||
			  </hyphenation>
 | 
			
		||||
			</hyphenationList>	
 | 
			
		||||
		  </formRepresentations>
 | 
			
		||||
	    </wordForm>
 | 
			
		||||
      </wordFormList>
 | 
			
		||||
    </body>
 | 
			
		||||
  </entry>
 | 
			
		||||
</lexicon>
 | 
			
		||||
@ -1,55 +0,0 @@
 | 
			
		||||
<lexicon>
 | 
			
		||||
  <entry>
 | 
			
		||||
    <head>
 | 
			
		||||
      <headword>
 | 
			
		||||
        <lemma>klop</lemma>
 | 
			
		||||
      </headword>
 | 
			
		||||
      <lexicalUnit id="123456" type="single">
 | 
			
		||||
        <lexeme lexical_unit_lexeme_id="654321">klop</lexeme>
 | 
			
		||||
      </lexicalUnit>
 | 
			
		||||
      <grammar>
 | 
			
		||||
        <category>samostalnik</category>
 | 
			
		||||
        <grammarFeature name="type">common</grammarFeature>
 | 
			
		||||
        <grammarFeature name="gender">feminine</grammarFeature>
 | 
			
		||||
        <grammarFeature name="pattern">Sz.1.2.o</grammarFeature>
 | 
			
		||||
      </grammar>
 | 
			
		||||
      <measureList>
 | 
			
		||||
        <measure type="frequency" source="Gigafida 2.0">999999</measure>
 | 
			
		||||
      </measureList>
 | 
			
		||||
    </head>
 | 
			
		||||
    <body>
 | 
			
		||||
      <wordFormList>
 | 
			
		||||
        <wordForm id="333333">
 | 
			
		||||
          <msd language="sl" system="JOS">Sozim</msd>
 | 
			
		||||
          <grammarFeatureList>
 | 
			
		||||
            <grammarFeature name="case">nominative</grammarFeature>
 | 
			
		||||
            <grammarFeature name="number">plural</grammarFeature>
 | 
			
		||||
          </grammarFeatureList>
 | 
			
		||||
          <formRepresentationList>
 | 
			
		||||
            <formRepresentation id="4444444">
 | 
			
		||||
              <measureList>
 | 
			
		||||
                <measure type="frequency" source="Gigafida 2.0">222222</measure>
 | 
			
		||||
              </measureList>
 | 
			
		||||
              <form>klopi</form>
 | 
			
		||||
            </formRepresentation>
 | 
			
		||||
          </formRepresentationList>
 | 
			
		||||
        </wordForm>
 | 
			
		||||
        <wordForm id="333334">
 | 
			
		||||
          <msd language="sl" system="JOS">Sozdm</msd>
 | 
			
		||||
          <grammarFeatureList>
 | 
			
		||||
            <grammarFeature name="case">dative</grammarFeature>
 | 
			
		||||
            <grammarFeature name="number">plural</grammarFeature>
 | 
			
		||||
          </grammarFeatureList>
 | 
			
		||||
          <formRepresentationList>
 | 
			
		||||
            <formRepresentation id="4444445">
 | 
			
		||||
              <measureList>
 | 
			
		||||
                <measure type="frequency" source="Gigafida 2.0">11111</measure>
 | 
			
		||||
              </measureList>
 | 
			
		||||
              <form>klopem</form>
 | 
			
		||||
            </formRepresentation>
 | 
			
		||||
          </formRepresentationList>
 | 
			
		||||
        </wordForm>
 | 
			
		||||
      </wordFormList>
 | 
			
		||||
    </body>
 | 
			
		||||
  </entry>
 | 
			
		||||
</lexicon>
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user