IssueID #937: Added basic comments for the inventory types

This commit is contained in:
Cyprian Laskowski 2020-03-04 21:27:33 +01:00
parent 660664a74d
commit ce2ffa9f71
2 changed files with 100 additions and 57 deletions

View File

@ -1,17 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema xmlns:xsd="">
<xsd:import namespace=""
schemaLocation="" />
<!-- This inventory defines types which are intended as building blocks that can and should be used in all CJVT schemas. -->
<xsd:schema xmlns:xsd="">
<xsd:import namespace="" schemaLocation="" />
<!-- Status string identifying stage in the lexicographic process -->
<xsd:simpleType name="statusType">
<xsd:restriction base="xsd:string"/>
<!-- Basic form and info of a lexical unit -->
<xsd:complexType name="lemmaType">
<xsd:extension base="xsd:string">
<xsd:attribute name="audio" type="xsd:string"/>
<xsd:attribute name="type">
<xsd:attribute name="audio" type="xsd:string"/> <!-- legacy ID associated with an audio file -->
<xsd:attribute name="type"> <!-- one of a limited number of possible types, default interpretation is normally "single" -->
<xsd:restriction base="xsd:string">
<xsd:enumeration value="single"/>
@ -24,16 +27,19 @@
<!-- Free form comment -->
<xsd:simpleType name="commentType">
<xsd:restriction base="xsd:string"/>
<!-- Used if need to disambiguate between two lexical units with the same lemma -->
<xsd:complexType name="homonymyType">
<xsd:element name="homonymyFeature" type="homonymyFeatureType" minOccurs="0" maxOccurs="unbounded"/>
<!-- Basic grammatical feature as a name-value pair -->
<xsd:complexType name="grammarFeatureType">
<xsd:extension base="xsd:string">
@ -42,6 +48,7 @@
<!-- Lemma-disambiguating feature, more open-ended than grammarFeatureType -->
<xsd:complexType name="homonymyFeatureType">
<xsd:extension base="xsd:string">
@ -50,6 +57,7 @@
<!-- Element which uniquely identifies a headword, with its basic properties and (if necessary) disambiguating features -->
<xsd:complexType name="headwordType">
<xsd:element name="lemma" type="lemmaType"/>
@ -57,34 +65,36 @@
<!-- Word component of a lexical unit -->
<xsd:complexType name="lexemeType">
<xsd:extension base="xsd:string">
<xsd:attribute name="type" type="xsd:string"/>
<xsd:attribute name="lexical_unit_lexeme_id" type="xsd:int"/>
<xsd:attribute name="sloleks" type="xsd:string"/>
<xsd:attribute name="kol" type="xsd:string"/>
<xsd:attribute name="type" type="xsd:string"/> <!-- hmm: should probably restrict possible values? currently seems to be "compound" or empty -->
<xsd:attribute name="lexical_unit_lexeme_id" type="xsd:int"/> <!-- ID within its lexical unit -->
<xsd:attribute name="sloleks" type="xsd:string"/> <!-- Sloleks ID -->
<xsd:attribute name="kol" type="xsd:string"/> <!-- hmm, legacy attribute for a collocate's lemma; why not just use @lemma? -->
<xsd:attribute name="lemma" type="xsd:string"/>
<xsd:attribute name="msd" type="xsd:string"/>
<xsd:attribute name="msd" type="xsd:string"/> <!-- msd within the Slovene JOS system -->
<!-- The lexical unit of the headword -->
<xsd:complexType name="lexicalUnitType">
<xsd:choice> <!-- the headword can be either a single lexeme or multiple components -->
<xsd:element name="lexeme" type="lexemeType"/>
<xsd:element name="component" type="componentType" minOccurs="2" maxOccurs="unbounded"/>
<xsd:attribute name="id" type="xsd:int" use="required"/>
<xsd:attribute name="type" type="xsd:string" use="required"/>
<xsd:attribute name="structure_id" type="xsd:int"/>
<xsd:attribute name="origin" type="xsd:string"/>
<xsd:attribute name="type" type="xsd:string" use="required"/> <!-- should probably restrict possible values in line with superbaza lexical_unit_type -->
<xsd:attribute name="structure_id" type="xsd:int"/> <!-- syntactic structure ID -->
<xsd:attribute name="origin" type="xsd:string"/> <!-- legacy attribute to track where the lexical unit came from -->
<!-- Slovenian morphosyntactic categories (using the JOS system) -->
<xsd:simpleType name="categoryType">
<!-- The closed set of expected categories. -->
<xsd:restriction base="xsd:string">
<xsd:enumeration value="samostalnik"/>
<xsd:enumeration value="glagol"/>
@ -101,28 +111,32 @@
<!-- Grammatical information -->
<xsd:complexType name="grammarType">
<xsd:element name="category" type="categoryType" minOccurs="0"/>
<xsd:element name="grammarFeature" type="grammarFeatureType" minOccurs="0" maxOccurs="unbounded"/>
<xsd:element name="category" type="categoryType" minOccurs="0"/> <!-- identifies the part of speech, unless it's not a single lexeme -->
<xsd:element name="grammarFeature" type="grammarFeatureType" minOccurs="0" maxOccurs="unbounded"/> <!-- relevant grammatical features -->
<!-- Statistical measure -->
<xsd:complexType name="measureType">
<xsd:extension base="xsd:decimal">
<xsd:attribute name="type" type="xsd:string" use="required"/>
<xsd:attribute name="source" type="xsd:string"/>
<xsd:attribute name="type" type="xsd:string" use="required"/> <!-- type of measure (e.g., frequency, logDice) -->
<xsd:attribute name="source" type="xsd:string"/> <!-- context of the measure, probably corpus name and version -->
<!-- List of measures -->
<xsd:complexType name="measureListType">
<xsd:element name="measure" type="measureType" minOccurs="0" maxOccurs="unbounded"/>
<!-- Variant of a lexical unit -->
<xsd:complexType name="variantType">
<xsd:extension base="xsd:string">
@ -131,123 +145,136 @@
<!-- List of variants -->
<xsd:complexType name="variantListType">
<xsd:element name="variant" type="variantType" minOccurs="0" maxOccurs="unbounded"/>
<!-- Related lexical unit -->
<xsd:complexType name="relatedEntryType">
<xsd:extension base="xsd:string">
<xsd:attribute name="lexical_unit_id" type="xsd:int"/>
<xsd:attribute name="origin" type="xsd:string"/>
<xsd:attribute name="origin" type="xsd:string"/> <!-- legacy attribute to track where the lexical unit came from -->
<!-- List of related entries -->
<xsd:complexType name="relatedEntryListType">
<xsd:element name="relatedEntry" type="relatedEntryType" minOccurs="0" maxOccurs="unbounded"/>
<!-- Name-value pair for providing extra information -->
<xsd:complexType name="labelType">
<xsd:extension base="xsd:string">
<xsd:attribute name="feature_id" type="xsd:int"/>
<xsd:attribute name="featureId" type="xsd:string"/>
<xsd:attribute name="type" type="xsd:string"/>
<xsd:attribute name="featureId" type="xsd:string"/> <!-- an ID from an external source of the label feature -->
<xsd:attribute name="type" type="xsd:string"/> <!-- name of the label feature corresponding to its ID -->
<!-- List of labels -->
<xsd:complexType name="labelListType">
<xsd:element name="label" type="labelType" minOccurs="0" maxOccurs="unbounded"/>
<!-- Definition of a sense -->
<xsd:complexType name="definitionType">
<xsd:extension base="xsd:string">
<xsd:attribute name="type" type="xsd:string"/>
<xsd:attribute name="type" type="xsd:string"/> <!-- hmm: what's expected here? -->
<xsd:attribute name="definition_id" type="xsd:int"/>
<!-- List of definitions -->
<xsd:complexType name="definitionListType">
<xsd:element name="definition" type="definitionType" minOccurs="0" maxOccurs="unbounded"/>
<!-- Translation from Slovene into another language -->
<xsd:complexType name="translationType">
<xsd:extension base="xsd:string">
<xsd:attribute name="targetLang" type="xsd:string" use="required"/>
<xsd:attribute name="source" type="xsd:string"/>
<xsd:attribute name="audio" type="xsd:string"/>
<xsd:attribute name="targetLang" type="xsd:string" use="required"/> <!-- target language -->
<xsd:attribute name="source" type="xsd:string"/> <!-- where the translation comes from -->
<xsd:attribute name="audio" type="xsd:string"/> <!-- legacy ID associated with an audio file -->
<!-- Component of a syntactic structure, containing one of the lexemes of a lexical unit; used in non-mixed content contexts -->
<xsd:complexType name="componentType">
<xsd:element name="lexeme" type="lexemeType"/>
<xsd:attribute name="structure_id" type="xsd:int"/>
<xsd:attribute name="num" type="xsd:int"/>
<xsd:attribute name="num" type="xsd:int"/> <!-- index identifying the component's position within the syntactic structure -->
<!-- Component in a syntactic structure and lexeme in a lexical unit; used in mixed content contexts -->
<xsd:complexType name="compType">
<xsd:extension base="xsd:string">
<xsd:attribute name="structure_id" type="xsd:int"/>
<xsd:attribute name="num" type="xsd:int"/>
<xsd:attribute name="role">
<xsd:attribute name="num" type="xsd:int"/> <!-- index identifying the component's position within the syntactic structure -->
<xsd:attribute name="role"> <!-- withinin collocations, identifies the word's role -->
<xsd:restriction base="xsd:string">
<xsd:enumeration value="headword"/>
<xsd:enumeration value="collocate"/>
<xsd:enumeration value="other"/>
<xsd:enumeration value="other"/> <!-- used for prepositions and other extra components -->
<xsd:attribute name="sloleks" type="xsd:string"/>
<xsd:attribute name="kol" type="xsd:string"/>
<xsd:attribute name="sloleks" type="xsd:string"/> <!-- Sloleks ID -->
<xsd:attribute name="kol" type="xsd:string"/> <!-- legacy attribute for the word's lemma -->
<!-- Mixed-content container element used in examples where roles can include multiple word components (e.g., semantic roles). -->
<xsd:complexType name="exampleTreeType" mixed="true">
<xsd:element name="comp" type="compType" maxOccurs="unbounded"/>
<xsd:element name="comp" type="compType" maxOccurs="unbounded"/> <!-- the tree contains one or more components -->
<xsd:attribute name="role" type="xsd:string"/>
<xsd:attribute name="role" type="xsd:string"/> <!-- should probably be a valid value from semanticRoleType -->
<!-- Example sentence from a corpus (as opposed to a multi-word example) -->
<xsd:complexType name="corpusExampleType" mixed="true">
<xsd:choice minOccurs="0" maxOccurs="unbounded">
<xsd:choice minOccurs="0" maxOccurs="unbounded"> <!-- in most contexts, comps are sufficient, but in some trees are needed -->
<xsd:element name="comp" type="compType"/>
<xsd:element name="tree" type="exampleTreeType"/>
<xsd:attribute name="corpus_id" type="xsd:int"/>
<xsd:attribute name="example_id" type="xsd:int"/>
<xsd:attribute name="exampleId" type="xsd:string"/>
<xsd:attribute name="modified" type="xsd:boolean"/>
<xsd:attribute name="exampleId" type="xsd:string"/> <!-- the example's ID within the corpus itself -->
<xsd:attribute name="modified" type="xsd:boolean"/> <!-- is the example text different than in the corpus? -->
<xsd:attribute name="lexical_unit_id" type="xsd:int"/>
<xsd:attribute name="audio" type="xsd:string"/>
<xsd:attribute name="audio" type="xsd:string"/> <!-- legacy ID associated with an audio file -->
<!-- Multi-word example (as opposed to a corpus sentence) -->
<xsd:complexType name="multipleLexemeExampleType">
<xsd:element name="comp" type="compType" maxOccurs="unbounded"/>
<xsd:element name="comp" type="compType" maxOccurs="unbounded"/> <!-- the example consists of a sequence of components -->
<xsd:attribute name="type" use="required">
<xsd:attribute name="type" use="required"> <!-- one of a pre-defined set of types (which will probably still be expanded) -->
<xsd:restriction base="xsd:string">
<xsd:enumeration value="collocation"/>
@ -257,32 +284,35 @@
<xsd:attribute name="lexical_unit_id" type="xsd:int"/>
<xsd:attribute name="structure_id" type="xsd:int"/>
<xsd:attribute name="structureName" type="xsd:string"/>
<xsd:attribute name="structureName" type="xsd:string"/> <!-- a string name for the structure, especially if we don't have its ID -->
<xsd:attribute name="frequency" type="xsd:int"/>
<xsd:attribute name="logDice" type="xsd:decimal"/>
<xsd:attribute name="audio" type="xsd:string"/>
<xsd:attribute name="cluster" type="xsd:int"/>
<xsd:attribute name="audio" type="xsd:string"/> <!-- legacy ID associated with an audio file -->
<xsd:attribute name="cluster" type="xsd:int"/> <!-- number identifying a group of clustered examples -->
<!-- Wrapper including a translation and related data -->
<xsd:complexType name="translationContainerType">
<xsd:element name="labelList" type="labelListType" minOccurs="0"/>
<xsd:element name="translation" type="translationType"/>
<xsd:element name="explanation" type="xsd:string" minOccurs="0"/>
<xsd:element name="explanation" type="xsd:string" minOccurs="0"/> <!-- hmm: what's expected here? -->
<xsd:attribute name="cluster" type="xsd:int" use="optional"/>
<xsd:attribute name="source" type="xsd:string" use="optional"/>
<xsd:attribute name="cluster" type="xsd:int"/> <!-- number identifying a group of clustered translations -->
<xsd:attribute name="source" type="xsd:string"/> <!-- hmm, we already have @source under translationType, that's not enough? -->
<!-- List of translation wrappers -->
<xsd:complexType name="translationContainerListType">
<xsd:element name="translationContainer" type="translationContainerType" minOccurs="0" maxOccurs="unbounded"/>
<!-- Wrapper including an example and (possibly) its translation -->
<xsd:complexType name="exampleContainerType">
<xsd:choice> <!-- can be a corpus example or a multi-word example -->
<xsd:element name="corpusExample" type="corpusExampleType"/>
<xsd:element name="multiwordExample" type="multipleLexemeExampleType"/>
@ -290,12 +320,14 @@
<!-- List of example containers -->
<xsd:complexType name="exampleContainerListType">
<xsd:element name="exampleContainer" type="exampleContainerType" minOccurs="0" maxOccurs="unbounded"/>
<!-- Set of semantic role labels -->
<xsd:simpleType name="semanticRoleType">
<xsd:restriction base="xsd:string">
<xsd:enumeration value="ACT"/>
@ -326,58 +358,66 @@
<!-- Container which associates a set of statistics with a certain type of data -->
<xsd:complexType name="statisticsContainerType">
<xsd:choice> <!-- currently only semantic roles are supported, but expect other types of data in future -->
<xsd:element name="semanticRole" type="semanticRoleType"/>
<xsd:element name="measureList" type="measureListType"/>
<!-- List of statistics containers -->
<xsd:complexType name="statisticsContainerListType">
<xsd:element name="statisticsContainer" type="statisticsContainerType" minOccurs="0" maxOccurs="unbounded"/>
<!-- Syntactic structure, with its components or examples -->
<xsd:complexType name="syntacticStructureType">
<xsd:choice> <!-- Can either list the structure's components, or examples of the structure (maybe this should be divided into 2 types) -->
<xsd:element name="component" type="componentType" minOccurs="0" maxOccurs="unbounded"/>
<xsd:element name="exampleContainerList" type="exampleContainerListType" minOccurs="0" maxOccurs="unbounded"/>
<xsd:attribute name="id" type="xsd:int"/>
<xsd:attribute name="name" type="xsd:string"/>
<xsd:attribute name="pp" type="xsd:string"/>
<xsd:attribute name="name" type="xsd:string"/> <!-- a string name for the structure, especially if we don't have its ID -->
<xsd:attribute name="pp" type="xsd:string"/> <!-- the structure's preposition (if any) -->
<!-- List of syntactic structures -->
<xsd:complexType name="syntacticStructureListType">
<xsd:element name="syntacticStructure" type="syntacticStructureType" minOccurs="0" maxOccurs="unbounded"/>
<xsd:attribute name="system" type="xsd:string"/>
<xsd:attribute name="system" type="xsd:string"/> <!-- probably JOS or UD -->
<!-- Container associating a semantic role with a list of syntactic structures -->
<xsd:complexType name="semanticRoleContainerType">
<xsd:element name="semanticRole" type="semanticRoleType"/>
<xsd:element name="syntacticStructureList" type="syntacticStructureListType"/>
<xsd:attribute name="cluster" type="xsd:int" use="optional"/>
<xsd:attribute name="cluster" type="xsd:int"/> <!-- hmm, number identifying a group of clustered roles? -->
<!-- List of semantic role containers -->
<xsd:complexType name="semanticRoleContainerListType">
<xsd:element name="semanticRoleContainer" type="semanticRoleContainerType" minOccurs="0" maxOccurs="unbounded"/>
<!-- Human-readable string identifying a valency pattern -->
<xsd:simpleType name="patternRepresentationType">
<xsd:restriction base="xsd:string"/>
<!-- Wrapper for valency pattern data -->
<xsd:complexType name="valencyPatternType">
<xsd:element name="measureList" type="measureListType"/>
@ -388,6 +428,7 @@
<xsd:attribute name="id" type="xsd:int"/>
<!-- List of valency patterns -->
<xsd:complexType name="valencyPatternListType">
<xsd:element name="valencyPattern" type="valencyPatternType" minOccurs="0" maxOccurs="unbounded"/>

View File

@ -1,7 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Rudimentary top-down schema intended for all CJVT dictionary-style datasets. Each such dataset should be
validated both with this schema and with an appropriate more specific schema based on the inventory. -->
<xsd:schema xmlns:xsd="">
<xsd:import namespace=""
schemaLocation="" />
<xsd:import namespace="" schemaLocation="" />
<!-- top-level element is a dictionary -->
<xsd:element name="dictionary" type="dictionaryType"/>