cjvt-srl-tagging/data_format.xml

180 lines
15 KiB
XML
Raw Permalink Normal View History

2019-02-10 22:23:24 +00:00
# sl.test.mate
Task: parse kres data into a input format for the tagger.
Conll 2009 format: https://wiki.ufal.ms.mff.cuni.cz/format-conll
id form lemma plemma pos ppos feat pfeat head phead deprel pdeprel fillpred pred apred1 apred2 apred3 ...
1 Prispelo prispeti prispeti V V Verb|main|perfective|participle|singular|neuter Verb|main|perfective|participle|singular|neuter 0 0 modra modra Y prispeti _ _ _ _ _ _ _ _ _
2 je biti biti V V Verb|auxiliary|present|third|singular|-Negative Verb|auxiliary|present|third|singular|-Negative 1 1 del del _ _ _ _ _ _ _ _ _ _ _
3 skoraj skoraj skoraj L L Particle Particle 4 4 dol dol _ _ _ _ _ _ _ _ _ _ _
4 60 60 60 K K Numeral|digit|cardinal Numeral|digit|cardinal 5 5 dol dol _ _ _ _ _ _ _ _ _ _ _
5 izdelkov izdelek izdelek S S Noun|common|masculine|plural|genitive Noun|common|masculine|plural|genitive 1 1 ena ena _ _ ACT _ _ _ _ _ _ _ _
6 osnovnošolcev osnovnošolec osnovnošolec S S Noun|common|masculine|plural|genitive Noun|common|masculine|plural|genitive 5 5 dol dol _ _ _ _ _ _ _ _ _ _ _
7 iz iz iz D D Adposition|genitive Adposition|genitive 9 9 dol dol _ _ _ _ _ _ _ _ _ _ _
8 12 12 12 K K Numeral|digit|cardinal Numeral|digit|cardinal 9 9 dol dol _ _ _ _ _ _ _ _ _ _ _
9 šol šola šola S S Noun|common|feminine|plural|genitive Noun|common|feminine|plural|genitive 6 6 dol dol _ _ _ _ _ _ _ _ _ _ _
10 širšega širok širok P P Adjective|general|comparative|neuter|singular|genitive Adjective|general|comparative|neuter|singular|genitive 12 12 dol dol _ _ _ _ _ _ _ _ _ _ _
11 ptujskega ptujski ptujski P P Adjective|general|positive|neuter|singular|genitive Adjective|general|positive|neuter|singular|genitive 12 12 dol dol _ _ _ _ _ _ _ _ _ _ _
12 območja območje območje S S Noun|common|neuter|singular|genitive Noun|common|neuter|singular|genitive 9 9 dol dol _ _ _ _ _ _ _ _ _ _ _
13 . . . . . . . 0 0 modra modra _ _ _ _ _ _ _ _ _ _ _
<s xml:id="ssj187.1237.4493">
<w ana="msd:Ggdd-es" lemma="prispeti" xml:id="ssj187.1237.4493.t1">Prispelo</w>
<c> </c>
<w ana="msd:Gp-ste-n" lemma="biti" xml:id="ssj187.1237.4493.t2">je</w>
<c> </c>
<w ana="msd:L" lemma="skoraj" xml:id="ssj187.1237.4493.t3">skoraj</w>
<c> </c>
<w ana="msd:Kag" lemma="60" xml:id="ssj187.1237.4493.t4">60</w>
<c> </c>
<w ana="msd:Sommr" lemma="izdelek" xml:id="ssj187.1237.4493.t5">izdelkov</w>
<c> </c>
<w ana="msd:Sommr" lemma="osnovnošolec" xml:id="ssj187.1237.4493.t6">osnovnošolcev</w>
<c> </c>
<w ana="msd:Dr" lemma="iz" xml:id="ssj187.1237.4493.t7">iz</w>
<c> </c>
<w ana="msd:Kag" lemma="12" xml:id="ssj187.1237.4493.t8">12</w>
<c> </c>
<w ana="msd:Sozmr" lemma="šola" xml:id="ssj187.1237.4493.t9">šol</w>
<c> </c>
<w ana="msd:Pppser" lemma="širok" xml:id="ssj187.1237.4493.t10">širšega</w>
<c> </c>
<w ana="msd:Ppnser" lemma="ptujski" xml:id="ssj187.1237.4493.t11">ptujskega</w>
<c> </c>
<w ana="msd:Soser" lemma="območje" xml:id="ssj187.1237.4493.t12">območja</w>
<pc ana="msd:U" xml:id="ssj187.1237.4493.t13">.</pc>
<linkGrp corresp="#ssj187.1237.4493" targFunc="head argument" type="syntax">
<link ana="syn:modra" target="#ssj187.1237.4493 #ssj187.1237.4493.t1"/>
<link ana="syn:del" target="#ssj187.1237.4493.t1 #ssj187.1237.4493.t2"/>
<link ana="syn:dol" target="#ssj187.1237.4493.t4 #ssj187.1237.4493.t3"/>
<link ana="syn:dol" target="#ssj187.1237.4493.t5 #ssj187.1237.4493.t4"/>
<link ana="syn:ena" target="#ssj187.1237.4493.t1 #ssj187.1237.4493.t5"/>
<link ana="syn:dol" target="#ssj187.1237.4493.t5 #ssj187.1237.4493.t6"/>
<link ana="syn:dol" target="#ssj187.1237.4493.t9 #ssj187.1237.4493.t7"/>
<link ana="syn:dol" target="#ssj187.1237.4493.t9 #ssj187.1237.4493.t8"/>
<link ana="syn:dol" target="#ssj187.1237.4493.t6 #ssj187.1237.4493.t9"/>
<link ana="syn:dol" target="#ssj187.1237.4493.t12 #ssj187.1237.4493.t10"/>
<link ana="syn:dol" target="#ssj187.1237.4493.t12 #ssj187.1237.4493.t11"/>
<link ana="syn:dol" target="#ssj187.1237.4493.t9 #ssj187.1237.4493.t12"/>
<link ana="syn:modra" target="#ssj187.1237.4493 #ssj187.1237.4493.t13"/>
</linkGrp>
<linkGrp corresp="#ssj187.1237.4493" targFunc="head argument" type="SRL">
<link ana="srl:ACT" target="#ssj187.1237.4493.t1 #ssj187.1237.4493.t5"/>
</linkGrp>
</s>
1 Izbrana izbran izbran P P Adjective|participle|positive|neuter|plural|accusative Adjective|participle|positive|neuter|plural|accusative 2 2 dol dol _ _ _ _ _ _ _ _ _ _ _
2 dela delo delo S S Noun|common|neuter|plural|accusative Noun|common|neuter|plural|accusative 4 4 dve dve _ _ PAT _ _ _ _ _ _ _ _
3 so biti biti V V Verb|auxiliary|present|third|plural|-Negative Verb|auxiliary|present|third|plural|-Negative 4 4 del del _ _ _ _ _ _ _ _ _ _ _
4 razstavili razstaviti razstaviti V V Verb|main|perfective|participle|plural|masculine Verb|main|perfective|participle|plural|masculine 0 0 modra modra Y razstaviti _ _ _ _ _ _ _ _ _
5 v v v D D Adposition|locative Adposition|locative 7 7 dol dol _ _ _ _ _ _ _ _ _ _ _
6 vhodni vhoden vhoden P P Adjective|general|positive|feminine|singular|locative Adjective|general|positive|feminine|singular|locative 7 7 dol dol _ _ _ _ _ _ _ _ _ _ _
7 avli avla avla S S Noun|common|feminine|singular|locative Noun|common|feminine|singular|locative 4 4 štiri štiri _ _ LOC _ _ _ _ _ _ _ _
8 upravne upraven upraven P P Adjective|general|positive|feminine|singular|genitive Adjective|general|positive|feminine|singular|genitive 9 9 dol dol _ _ _ _ _ _ _ _ _ _ _
9 stavbe stavba stavba S S Noun|common|feminine|singular|genitive Noun|common|feminine|singular|genitive 7 7 dol dol _ _ _ _ _ _ _ _ _ _ _
10 PP PP PP S S Noun|proper|feminine|singular|genitive Noun|proper|feminine|singular|genitive 9 9 dol dol _ _ _ _ _ _ _ _ _ _ _
11 in in in V V Conjunction|coordinating Conjunction|coordinating 17 17 vez vez _ _ _ _ _ _ _ _ _ _ _
12 tja tja tja R R Adverb|general|positive Adverb|general|positive 17 17 štiri štiri _ _ _ GOAL _ _ _ _ _ _ _
13 konec konec konec D D Adposition|genitive Adposition|genitive 14 14 dol dol _ _ _ _ _ _ _ _ _ _ _
14 novembra november november S S Noun|common|masculine|singular|genitive Noun|common|masculine|singular|genitive 17 17 štiri štiri _ _ _ TIME _ _ _ _ _ _ _
15 na na na D D Adposition|accusative Adposition|accusative 16 16 dol dol _ _ _ _ _ _ _ _ _ _ _
16 otvoritev otvoritev otvoritev S S Noun|common|feminine|singular|accusative Noun|common|feminine|singular|accusative 17 17 štiri štiri _ _ _ AIM _ _ _ _ _ _ _
17 povabili povabiti povabiti V V Verb|main|perfective|participle|plural|masculine Verb|main|perfective|participle|plural|masculine 0 0 modra modra Y povabiti _ _ _ _ _ _ _ _ _
18 male mali mali P P Adjective|general|positive|masculine|plural|accusative Adjective|general|positive|masculine|plural|accusative 19 19 dol dol _ _ _ _ _ _ _ _ _ _ _
19 risarje risar risar S S Noun|common|masculine|plural|accusative Noun|common|masculine|plural|accusative 17 17 dve dve _ _ _ PAT _ _ _ _ _ _ _
20 , , , , , , , 22 22 vez vez _ _ _ _ _ _ _ _ _ _ _
21 njihove njihov njihov Z Z Pronoun|possessive|third|masculine|plural|accusative|plural Pronoun|possessive|third|masculine|plural|accusative|plural 22 22 dol dol _ _ _ _ _ _ _ _ _ _ _
22 starše starš starš S S Noun|common|masculine|plural|accusative Noun|common|masculine|plural|accusative 19 19 prir prir _ _ _ _ _ _ _ _ _ _ _
23 in in in V V Conjunction|coordinating Conjunction|coordinating 24 24 vez vez _ _ _ _ _ _ _ _ _ _ _
24 učitelje učitelj učitelj S S Noun|common|masculine|plural|accusative Noun|common|masculine|plural|accusative 19 19 prir prir _ _ _ _ _ _ _ _ _ _ _
25 . . . . . . . 0 0 modra modra _ _ _ _ _ _ _ _ _ _ _
<s xml:id="ssj187.1237.4495">
<w ana="msd:Pdnsmt" lemma="izbran" xml:id="ssj187.1237.4495.t1">Izbrana</w>
<c> </c>
<w ana="msd:Sosmt" lemma="delo" xml:id="ssj187.1237.4495.t2">dela</w>
<c> </c>
<w ana="msd:Gp-stm-n" lemma="biti" xml:id="ssj187.1237.4495.t3">so</w>
<c> </c>
<w ana="msd:Ggdd-mm" lemma="razstaviti" xml:id="ssj187.1237.4495.t4">razstavili</w>
<c> </c>
<w ana="msd:Dm" lemma="v" xml:id="ssj187.1237.4495.t5">v</w>
<c> </c>
<w ana="msd:Ppnzem" lemma="vhoden" xml:id="ssj187.1237.4495.t6">vhodni</w>
<c> </c>
<w ana="msd:Sozem" lemma="avla" xml:id="ssj187.1237.4495.t7">avli</w>
<c> </c>
<w ana="msd:Ppnzer" lemma="upraven" xml:id="ssj187.1237.4495.t8">upravne</w>
<c> </c>
<w ana="msd:Sozer" lemma="stavba" xml:id="ssj187.1237.4495.t9">stavbe</w>
<c> </c>
<seg subtype="org" type="name">
<w ana="msd:Slzer" lemma="PP" xml:id="ssj187.1237.4495.t10">PP</w>
</seg>
<c> </c>
<w ana="msd:Vp" lemma="in" xml:id="ssj187.1237.4495.t11">in</w>
<c> </c>
<w ana="msd:Rsn" lemma="tja" xml:id="ssj187.1237.4495.t12">tja</w>
<c> </c>
<w ana="msd:Dr" lemma="konec" xml:id="ssj187.1237.4495.t13">konec</w>
<c> </c>
<w ana="msd:Somer" lemma="november" xml:id="ssj187.1237.4495.t14">novembra</w>
<c> </c>
<w ana="msd:Dt" lemma="na" xml:id="ssj187.1237.4495.t15">na</w>
<c> </c>
<w ana="msd:Sozet" lemma="otvoritev" xml:id="ssj187.1237.4495.t16">otvoritev</w>
<c> </c>
<w ana="msd:Ggdd-mm" lemma="povabiti" xml:id="ssj187.1237.4495.t17">povabili</w>
<c> </c>
<w ana="msd:Ppnmmt" lemma="mali" xml:id="ssj187.1237.4495.t18">male</w>
<c> </c>
<w ana="msd:Sommt" lemma="risar" xml:id="ssj187.1237.4495.t19">risarje</w>
<pc ana="msd:U" xml:id="ssj187.1237.4495.t20">,</pc>
<c> </c>
<w ana="msd:Zstmmtm" lemma="njihov" xml:id="ssj187.1237.4495.t21">njihove</w>
<c> </c>
<w ana="msd:Sommt" lemma="starš" xml:id="ssj187.1237.4495.t22">starše</w>
<c> </c>
<w ana="msd:Vp" lemma="in" xml:id="ssj187.1237.4495.t23">in</w>
<c> </c>
<w ana="msd:Sommt" lemma="učitelj" xml:id="ssj187.1237.4495.t24">učitelje</w>
<pc ana="msd:U" xml:id="ssj187.1237.4495.t25">.</pc>
<linkGrp corresp="#ssj187.1237.4495" targFunc="head argument" type="syntax">
<link ana="syn:dol" target="#ssj187.1237.4495.t2 #ssj187.1237.4495.t1"/>
<link ana="syn:dve" target="#ssj187.1237.4495.t4 #ssj187.1237.4495.t2"/>
<link ana="syn:del" target="#ssj187.1237.4495.t4 #ssj187.1237.4495.t3"/>
<link ana="syn:modra" target="#ssj187.1237.4495 #ssj187.1237.4495.t4"/>
<link ana="syn:dol" target="#ssj187.1237.4495.t7 #ssj187.1237.4495.t5"/>
<link ana="syn:dol" target="#ssj187.1237.4495.t7 #ssj187.1237.4495.t6"/>
<link ana="syn:štiri" target="#ssj187.1237.4495.t4 #ssj187.1237.4495.t7"/>
<link ana="syn:dol" target="#ssj187.1237.4495.t9 #ssj187.1237.4495.t8"/>
<link ana="syn:dol" target="#ssj187.1237.4495.t7 #ssj187.1237.4495.t9"/>
<link ana="syn:dol" target="#ssj187.1237.4495.t9 #ssj187.1237.4495.t10"/>
<link ana="syn:vez" target="#ssj187.1237.4495.t17 #ssj187.1237.4495.t11"/>
<link ana="syn:štiri" target="#ssj187.1237.4495.t17 #ssj187.1237.4495.t12"/>
<link ana="syn:dol" target="#ssj187.1237.4495.t14 #ssj187.1237.4495.t13"/>
<link ana="syn:štiri" target="#ssj187.1237.4495.t17 #ssj187.1237.4495.t14"/>
<link ana="syn:dol" target="#ssj187.1237.4495.t16 #ssj187.1237.4495.t15"/>
<link ana="syn:štiri" target="#ssj187.1237.4495.t17 #ssj187.1237.4495.t16"/>
<link ana="syn:modra" target="#ssj187.1237.4495 #ssj187.1237.4495.t17"/>
<link ana="syn:dol" target="#ssj187.1237.4495.t19 #ssj187.1237.4495.t18"/>
<link ana="syn:dve" target="#ssj187.1237.4495.t17 #ssj187.1237.4495.t19"/>
<link ana="syn:vez" target="#ssj187.1237.4495.t22 #ssj187.1237.4495.t20"/>
<link ana="syn:dol" target="#ssj187.1237.4495.t22 #ssj187.1237.4495.t21"/>
<link ana="syn:prir" target="#ssj187.1237.4495.t19 #ssj187.1237.4495.t22"/>
<link ana="syn:vez" target="#ssj187.1237.4495.t24 #ssj187.1237.4495.t23"/>
<link ana="syn:prir" target="#ssj187.1237.4495.t19 #ssj187.1237.4495.t24"/>
<link ana="syn:modra" target="#ssj187.1237.4495 #ssj187.1237.4495.t25"/>
</linkGrp>
<linkGrp corresp="#ssj187.1237.4495" targFunc="head argument" type="SRL">
<link ana="srl:AIM" target="#ssj187.1237.4495.t17 #ssj187.1237.4495.t16"/>
<link ana="srl:GOAL" target="#ssj187.1237.4495.t17 #ssj187.1237.4495.t12"/>
<link ana="srl:LOC" target="#ssj187.1237.4495.t4 #ssj187.1237.4495.t7"/>
<link ana="srl:PAT" target="#ssj187.1237.4495.t17 #ssj187.1237.4495.t19"/>
<link ana="srl:PAT" target="#ssj187.1237.4495.t4 #ssj187.1237.4495.t2"/>
<link ana="srl:TIME" target="#ssj187.1237.4495.t17 #ssj187.1237.4495.t14"/>
</linkGrp>