IssueID #937: Added basic comments for the inventory types

This commit is contained in:
Cyprian Laskowski 2020-03-04 21:27:33 +01:00
parent 660664a74d
commit ce2ffa9f71
2 changed files with 100 additions and 57 deletions

View File

@ -1,17 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace"
schemaLocation="http://www.w3.org/2001/xml.xsd" />
<!-- This inventory defines types which are intended as building blocks that can and should be used in all CJVT schemas. -->
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="http://www.w3.org/2001/xml.xsd" />
<!-- Status string identifying stage in the lexicographic process -->
<xsd:simpleType name="statusType"> <xsd:simpleType name="statusType">
<xsd:restriction base="xsd:string"/> <xsd:restriction base="xsd:string"/>
</xsd:simpleType> </xsd:simpleType>
<!-- Basic form and info of a lexical unit -->
<xsd:complexType name="lemmaType"> <xsd:complexType name="lemmaType">
<xsd:simpleContent> <xsd:simpleContent>
<xsd:extension base="xsd:string"> <xsd:extension base="xsd:string">
<xsd:attribute name="audio" type="xsd:string"/> <xsd:attribute name="audio" type="xsd:string"/> <!-- legacy ID associated with an audio file -->
<xsd:attribute name="type"> <xsd:attribute name="type"> <!-- one of a limited number of possible types, default interpretation is normally "single" -->
<xsd:simpleType> <xsd:simpleType>
<xsd:restriction base="xsd:string"> <xsd:restriction base="xsd:string">
<xsd:enumeration value="single"/> <xsd:enumeration value="single"/>
@ -24,16 +27,19 @@
</xsd:simpleContent> </xsd:simpleContent>
</xsd:complexType> </xsd:complexType>
<!-- Free form comment -->
<xsd:simpleType name="commentType"> <xsd:simpleType name="commentType">
<xsd:restriction base="xsd:string"/> <xsd:restriction base="xsd:string"/>
</xsd:simpleType> </xsd:simpleType>
<!-- Used if need to disambiguate between two lexical units with the same lemma -->
<xsd:complexType name="homonymyType"> <xsd:complexType name="homonymyType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="homonymyFeature" type="homonymyFeatureType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="homonymyFeature" type="homonymyFeatureType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- Basic grammatical feature as a name-value pair -->
<xsd:complexType name="grammarFeatureType"> <xsd:complexType name="grammarFeatureType">
<xsd:simpleContent> <xsd:simpleContent>
<xsd:extension base="xsd:string"> <xsd:extension base="xsd:string">
@ -42,6 +48,7 @@
</xsd:simpleContent> </xsd:simpleContent>
</xsd:complexType> </xsd:complexType>
<!-- Lemma-disambiguating feature, more open-ended than grammarFeatureType -->
<xsd:complexType name="homonymyFeatureType"> <xsd:complexType name="homonymyFeatureType">
<xsd:simpleContent> <xsd:simpleContent>
<xsd:extension base="xsd:string"> <xsd:extension base="xsd:string">
@ -50,6 +57,7 @@
</xsd:simpleContent> </xsd:simpleContent>
</xsd:complexType> </xsd:complexType>
<!-- Element which uniquely identifies a headword, with its basic properties and (if necessary) disambiguating features -->
<xsd:complexType name="headwordType"> <xsd:complexType name="headwordType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="lemma" type="lemmaType"/> <xsd:element name="lemma" type="lemmaType"/>
@ -57,34 +65,36 @@
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- Word component of a lexical unit -->
<xsd:complexType name="lexemeType"> <xsd:complexType name="lexemeType">
<xsd:simpleContent> <xsd:simpleContent>
<xsd:extension base="xsd:string"> <xsd:extension base="xsd:string">
<xsd:attribute name="type" type="xsd:string"/> <xsd:attribute name="type" type="xsd:string"/> <!-- hmm: should probably restrict possible values? currently seems to be "compound" or empty -->
<xsd:attribute name="lexical_unit_lexeme_id" type="xsd:int"/> <xsd:attribute name="lexical_unit_lexeme_id" type="xsd:int"/> <!-- ID within its lexical unit -->
<xsd:attribute name="sloleks" type="xsd:string"/> <xsd:attribute name="sloleks" type="xsd:string"/> <!-- Sloleks ID -->
<xsd:attribute name="kol" type="xsd:string"/> <xsd:attribute name="kol" type="xsd:string"/> <!-- hmm, legacy attribute for a collocate's lemma; why not just use @lemma? -->
<xsd:attribute name="lemma" type="xsd:string"/> <xsd:attribute name="lemma" type="xsd:string"/>
<xsd:attribute name="msd" type="xsd:string"/> <xsd:attribute name="msd" type="xsd:string"/> <!-- msd within the Slovene JOS system -->
</xsd:extension> </xsd:extension>
</xsd:simpleContent> </xsd:simpleContent>
</xsd:complexType> </xsd:complexType>
<!-- The lexical unit of the headword -->
<xsd:complexType name="lexicalUnitType"> <xsd:complexType name="lexicalUnitType">
<xsd:sequence> <xsd:sequence>
<xsd:choice> <xsd:choice> <!-- the headword can be either a single lexeme or multiple components -->
<xsd:element name="lexeme" type="lexemeType"/> <xsd:element name="lexeme" type="lexemeType"/>
<xsd:element name="component" type="componentType" minOccurs="2" maxOccurs="unbounded"/> <xsd:element name="component" type="componentType" minOccurs="2" maxOccurs="unbounded"/>
</xsd:choice> </xsd:choice>
</xsd:sequence> </xsd:sequence>
<xsd:attribute name="id" type="xsd:int" use="required"/> <xsd:attribute name="id" type="xsd:int" use="required"/>
<xsd:attribute name="type" type="xsd:string" use="required"/> <xsd:attribute name="type" type="xsd:string" use="required"/> <!-- should probably restrict possible values in line with superbaza lexical_unit_type -->
<xsd:attribute name="structure_id" type="xsd:int"/> <xsd:attribute name="structure_id" type="xsd:int"/> <!-- syntactic structure ID -->
<xsd:attribute name="origin" type="xsd:string"/> <xsd:attribute name="origin" type="xsd:string"/> <!-- legacy attribute to track where the lexical unit came from -->
</xsd:complexType> </xsd:complexType>
<!-- Slovenian morphosyntactic categories (using the JOS system) -->
<xsd:simpleType name="categoryType"> <xsd:simpleType name="categoryType">
<!-- The closed set of expected categories. -->
<xsd:restriction base="xsd:string"> <xsd:restriction base="xsd:string">
<xsd:enumeration value="samostalnik"/> <xsd:enumeration value="samostalnik"/>
<xsd:enumeration value="glagol"/> <xsd:enumeration value="glagol"/>
@ -101,28 +111,32 @@
</xsd:restriction> </xsd:restriction>
</xsd:simpleType> </xsd:simpleType>
<!-- Grammatical information -->
<xsd:complexType name="grammarType"> <xsd:complexType name="grammarType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="category" type="categoryType" minOccurs="0"/> <xsd:element name="category" type="categoryType" minOccurs="0"/> <!-- identifies the part of speech, unless it's not a single lexeme -->
<xsd:element name="grammarFeature" type="grammarFeatureType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="grammarFeature" type="grammarFeatureType" minOccurs="0" maxOccurs="unbounded"/> <!-- relevant grammatical features -->
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- Statistical measure -->
<xsd:complexType name="measureType"> <xsd:complexType name="measureType">
<xsd:simpleContent> <xsd:simpleContent>
<xsd:extension base="xsd:decimal"> <xsd:extension base="xsd:decimal">
<xsd:attribute name="type" type="xsd:string" use="required"/> <xsd:attribute name="type" type="xsd:string" use="required"/> <!-- type of measure (e.g., frequency, logDice) -->
<xsd:attribute name="source" type="xsd:string"/> <xsd:attribute name="source" type="xsd:string"/> <!-- context of the measure, probably corpus name and version -->
</xsd:extension> </xsd:extension>
</xsd:simpleContent> </xsd:simpleContent>
</xsd:complexType> </xsd:complexType>
<!-- List of measures -->
<xsd:complexType name="measureListType"> <xsd:complexType name="measureListType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="measure" type="measureType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="measure" type="measureType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- Variant of a lexical unit -->
<xsd:complexType name="variantType"> <xsd:complexType name="variantType">
<xsd:simpleContent> <xsd:simpleContent>
<xsd:extension base="xsd:string"> <xsd:extension base="xsd:string">
@ -131,123 +145,136 @@
</xsd:simpleContent> </xsd:simpleContent>
</xsd:complexType> </xsd:complexType>
<!-- List of variants -->
<xsd:complexType name="variantListType"> <xsd:complexType name="variantListType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="variant" type="variantType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="variant" type="variantType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- Related lexical unit -->
<xsd:complexType name="relatedEntryType"> <xsd:complexType name="relatedEntryType">
<xsd:simpleContent> <xsd:simpleContent>
<xsd:extension base="xsd:string"> <xsd:extension base="xsd:string">
<xsd:attribute name="lexical_unit_id" type="xsd:int"/> <xsd:attribute name="lexical_unit_id" type="xsd:int"/>
<xsd:attribute name="origin" type="xsd:string"/> <xsd:attribute name="origin" type="xsd:string"/> <!-- legacy attribute to track where the lexical unit came from -->
</xsd:extension> </xsd:extension>
</xsd:simpleContent> </xsd:simpleContent>
</xsd:complexType> </xsd:complexType>
<!-- List of related entries -->
<xsd:complexType name="relatedEntryListType"> <xsd:complexType name="relatedEntryListType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="relatedEntry" type="relatedEntryType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="relatedEntry" type="relatedEntryType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- Name-value pair for providing extra information -->
<xsd:complexType name="labelType"> <xsd:complexType name="labelType">
<xsd:simpleContent> <xsd:simpleContent>
<xsd:extension base="xsd:string"> <xsd:extension base="xsd:string">
<xsd:attribute name="feature_id" type="xsd:int"/> <xsd:attribute name="feature_id" type="xsd:int"/>
<xsd:attribute name="featureId" type="xsd:string"/> <xsd:attribute name="featureId" type="xsd:string"/> <!-- an ID from an external source of the label feature -->
<xsd:attribute name="type" type="xsd:string"/> <xsd:attribute name="type" type="xsd:string"/> <!-- name of the label feature corresponding to its ID -->
</xsd:extension> </xsd:extension>
</xsd:simpleContent> </xsd:simpleContent>
</xsd:complexType> </xsd:complexType>
<!-- List of labels -->
<xsd:complexType name="labelListType"> <xsd:complexType name="labelListType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="label" type="labelType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="label" type="labelType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- Definition of a sense -->
<xsd:complexType name="definitionType"> <xsd:complexType name="definitionType">
<xsd:simpleContent> <xsd:simpleContent>
<xsd:extension base="xsd:string"> <xsd:extension base="xsd:string">
<xsd:attribute name="type" type="xsd:string"/> <xsd:attribute name="type" type="xsd:string"/> <!-- hmm: what's expected here? -->
<xsd:attribute name="definition_id" type="xsd:int"/> <xsd:attribute name="definition_id" type="xsd:int"/>
</xsd:extension> </xsd:extension>
</xsd:simpleContent> </xsd:simpleContent>
</xsd:complexType> </xsd:complexType>
<!-- List of definitions -->
<xsd:complexType name="definitionListType"> <xsd:complexType name="definitionListType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="definition" type="definitionType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="definition" type="definitionType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- Translation from Slovene into another language -->
<xsd:complexType name="translationType"> <xsd:complexType name="translationType">
<xsd:simpleContent> <xsd:simpleContent>
<xsd:extension base="xsd:string"> <xsd:extension base="xsd:string">
<xsd:attribute name="targetLang" type="xsd:string" use="required"/> <xsd:attribute name="targetLang" type="xsd:string" use="required"/> <!-- target language -->
<xsd:attribute name="source" type="xsd:string"/> <xsd:attribute name="source" type="xsd:string"/> <!-- where the translation comes from -->
<xsd:attribute name="audio" type="xsd:string"/> <xsd:attribute name="audio" type="xsd:string"/> <!-- legacy ID associated with an audio file -->
</xsd:extension> </xsd:extension>
</xsd:simpleContent> </xsd:simpleContent>
</xsd:complexType> </xsd:complexType>
<!-- Component of a syntactic structure, containing one of the lexemes of a lexical unit; used in non-mixed content contexts -->
<xsd:complexType name="componentType"> <xsd:complexType name="componentType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="lexeme" type="lexemeType"/> <xsd:element name="lexeme" type="lexemeType"/>
</xsd:sequence> </xsd:sequence>
<xsd:attribute name="structure_id" type="xsd:int"/> <xsd:attribute name="structure_id" type="xsd:int"/>
<xsd:attribute name="num" type="xsd:int"/> <xsd:attribute name="num" type="xsd:int"/> <!-- index identifying the component's position within the syntactic structure -->
</xsd:complexType> </xsd:complexType>
<!-- Component in a syntactic structure and lexeme in a lexical unit; used in mixed content contexts -->
<xsd:complexType name="compType"> <xsd:complexType name="compType">
<xsd:simpleContent> <xsd:simpleContent>
<xsd:extension base="xsd:string"> <xsd:extension base="xsd:string">
<xsd:attribute name="structure_id" type="xsd:int"/> <xsd:attribute name="structure_id" type="xsd:int"/>
<xsd:attribute name="num" type="xsd:int"/> <xsd:attribute name="num" type="xsd:int"/> <!-- index identifying the component's position within the syntactic structure -->
<xsd:attribute name="role"> <xsd:attribute name="role"> <!-- withinin collocations, identifies the word's role -->
<xsd:simpleType> <xsd:simpleType>
<xsd:restriction base="xsd:string"> <xsd:restriction base="xsd:string">
<xsd:enumeration value="headword"/> <xsd:enumeration value="headword"/>
<xsd:enumeration value="collocate"/> <xsd:enumeration value="collocate"/>
<xsd:enumeration value="other"/> <xsd:enumeration value="other"/> <!-- used for prepositions and other extra components -->
</xsd:restriction> </xsd:restriction>
</xsd:simpleType> </xsd:simpleType>
</xsd:attribute> </xsd:attribute>
<xsd:attribute name="sloleks" type="xsd:string"/> <xsd:attribute name="sloleks" type="xsd:string"/> <!-- Sloleks ID -->
<xsd:attribute name="kol" type="xsd:string"/> <xsd:attribute name="kol" type="xsd:string"/> <!-- legacy attribute for the word's lemma -->
</xsd:extension> </xsd:extension>
</xsd:simpleContent> </xsd:simpleContent>
</xsd:complexType> </xsd:complexType>
<!-- Mixed-content container element used in examples where roles can include multiple word components (e.g., semantic roles). -->
<xsd:complexType name="exampleTreeType" mixed="true"> <xsd:complexType name="exampleTreeType" mixed="true">
<xsd:sequence> <xsd:sequence>
<xsd:element name="comp" type="compType" maxOccurs="unbounded"/> <xsd:element name="comp" type="compType" maxOccurs="unbounded"/> <!-- the tree contains one or more components -->
</xsd:sequence> </xsd:sequence>
<xsd:attribute name="role" type="xsd:string"/> <xsd:attribute name="role" type="xsd:string"/> <!-- should probably be a valid value from semanticRoleType -->
</xsd:complexType> </xsd:complexType>
<!-- Example sentence from a corpus (as opposed to a multi-word example) -->
<xsd:complexType name="corpusExampleType" mixed="true"> <xsd:complexType name="corpusExampleType" mixed="true">
<xsd:sequence> <xsd:sequence>
<xsd:choice minOccurs="0" maxOccurs="unbounded"> <xsd:choice minOccurs="0" maxOccurs="unbounded"> <!-- in most contexts, comps are sufficient, but in some trees are needed -->
<xsd:element name="comp" type="compType"/> <xsd:element name="comp" type="compType"/>
<xsd:element name="tree" type="exampleTreeType"/> <xsd:element name="tree" type="exampleTreeType"/>
</xsd:choice> </xsd:choice>
</xsd:sequence> </xsd:sequence>
<xsd:attribute name="corpus_id" type="xsd:int"/> <xsd:attribute name="corpus_id" type="xsd:int"/>
<xsd:attribute name="example_id" type="xsd:int"/> <xsd:attribute name="example_id" type="xsd:int"/>
<xsd:attribute name="exampleId" type="xsd:string"/> <xsd:attribute name="exampleId" type="xsd:string"/> <!-- the example's ID within the corpus itself -->
<xsd:attribute name="modified" type="xsd:boolean"/> <xsd:attribute name="modified" type="xsd:boolean"/> <!-- is the example text different than in the corpus? -->
<xsd:attribute name="lexical_unit_id" type="xsd:int"/> <xsd:attribute name="lexical_unit_id" type="xsd:int"/>
<xsd:attribute name="audio" type="xsd:string"/> <xsd:attribute name="audio" type="xsd:string"/> <!-- legacy ID associated with an audio file -->
</xsd:complexType> </xsd:complexType>
<!-- Multi-word example (as opposed to a corpus sentence) -->
<xsd:complexType name="multipleLexemeExampleType"> <xsd:complexType name="multipleLexemeExampleType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="comp" type="compType" maxOccurs="unbounded"/> <xsd:element name="comp" type="compType" maxOccurs="unbounded"/> <!-- the example consists of a sequence of components -->
</xsd:sequence> </xsd:sequence>
<xsd:attribute name="type" use="required"> <xsd:attribute name="type" use="required"> <!-- one of a pre-defined set of types (which will probably still be expanded) -->
<xsd:simpleType> <xsd:simpleType>
<xsd:restriction base="xsd:string"> <xsd:restriction base="xsd:string">
<xsd:enumeration value="collocation"/> <xsd:enumeration value="collocation"/>
@ -257,32 +284,35 @@
</xsd:attribute> </xsd:attribute>
<xsd:attribute name="lexical_unit_id" type="xsd:int"/> <xsd:attribute name="lexical_unit_id" type="xsd:int"/>
<xsd:attribute name="structure_id" type="xsd:int"/> <xsd:attribute name="structure_id" type="xsd:int"/>
<xsd:attribute name="structureName" type="xsd:string"/> <xsd:attribute name="structureName" type="xsd:string"/> <!-- a string name for the structure, especially if we don't have its ID -->
<xsd:attribute name="frequency" type="xsd:int"/> <xsd:attribute name="frequency" type="xsd:int"/>
<xsd:attribute name="logDice" type="xsd:decimal"/> <xsd:attribute name="logDice" type="xsd:decimal"/>
<xsd:attribute name="audio" type="xsd:string"/> <xsd:attribute name="audio" type="xsd:string"/> <!-- legacy ID associated with an audio file -->
<xsd:attribute name="cluster" type="xsd:int"/> <xsd:attribute name="cluster" type="xsd:int"/> <!-- number identifying a group of clustered examples -->
</xsd:complexType> </xsd:complexType>
<!-- Wrapper including a translation and related data -->
<xsd:complexType name="translationContainerType"> <xsd:complexType name="translationContainerType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="labelList" type="labelListType" minOccurs="0"/> <xsd:element name="labelList" type="labelListType" minOccurs="0"/>
<xsd:element name="translation" type="translationType"/> <xsd:element name="translation" type="translationType"/>
<xsd:element name="explanation" type="xsd:string" minOccurs="0"/> <xsd:element name="explanation" type="xsd:string" minOccurs="0"/> <!-- hmm: what's expected here? -->
</xsd:sequence> </xsd:sequence>
<xsd:attribute name="cluster" type="xsd:int" use="optional"/> <xsd:attribute name="cluster" type="xsd:int"/> <!-- number identifying a group of clustered translations -->
<xsd:attribute name="source" type="xsd:string" use="optional"/> <xsd:attribute name="source" type="xsd:string"/> <!-- hmm, we already have @source under translationType, that's not enough? -->
</xsd:complexType> </xsd:complexType>
<!-- List of translation wrappers -->
<xsd:complexType name="translationContainerListType"> <xsd:complexType name="translationContainerListType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="translationContainer" type="translationContainerType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="translationContainer" type="translationContainerType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- Wrapper including an example and (possibly) its translation -->
<xsd:complexType name="exampleContainerType"> <xsd:complexType name="exampleContainerType">
<xsd:sequence> <xsd:sequence>
<xsd:choice> <xsd:choice> <!-- can be a corpus example or a multi-word example -->
<xsd:element name="corpusExample" type="corpusExampleType"/> <xsd:element name="corpusExample" type="corpusExampleType"/>
<xsd:element name="multiwordExample" type="multipleLexemeExampleType"/> <xsd:element name="multiwordExample" type="multipleLexemeExampleType"/>
</xsd:choice> </xsd:choice>
@ -290,12 +320,14 @@
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- List of example containers -->
<xsd:complexType name="exampleContainerListType"> <xsd:complexType name="exampleContainerListType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="exampleContainer" type="exampleContainerType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="exampleContainer" type="exampleContainerType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- Set of semantic role labels -->
<xsd:simpleType name="semanticRoleType"> <xsd:simpleType name="semanticRoleType">
<xsd:restriction base="xsd:string"> <xsd:restriction base="xsd:string">
<xsd:enumeration value="ACT"/> <xsd:enumeration value="ACT"/>
@ -326,58 +358,66 @@
</xsd:restriction> </xsd:restriction>
</xsd:simpleType> </xsd:simpleType>
<!-- Container which associates a set of statistics with a certain type of data -->
<xsd:complexType name="statisticsContainerType"> <xsd:complexType name="statisticsContainerType">
<xsd:sequence> <xsd:sequence>
<xsd:choice> <xsd:choice> <!-- currently only semantic roles are supported, but expect other types of data in future -->
<xsd:element name="semanticRole" type="semanticRoleType"/> <xsd:element name="semanticRole" type="semanticRoleType"/>
</xsd:choice> </xsd:choice>
<xsd:element name="measureList" type="measureListType"/> <xsd:element name="measureList" type="measureListType"/>
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- List of statistics containers -->
<xsd:complexType name="statisticsContainerListType"> <xsd:complexType name="statisticsContainerListType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="statisticsContainer" type="statisticsContainerType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="statisticsContainer" type="statisticsContainerType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- Syntactic structure, with its components or examples -->
<xsd:complexType name="syntacticStructureType"> <xsd:complexType name="syntacticStructureType">
<xsd:sequence> <xsd:sequence>
<xsd:choice> <xsd:choice> <!-- Can either list the structure's components, or examples of the structure (maybe this should be divided into 2 types) -->
<xsd:element name="component" type="componentType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="component" type="componentType" minOccurs="0" maxOccurs="unbounded"/>
<xsd:element name="exampleContainerList" type="exampleContainerListType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="exampleContainerList" type="exampleContainerListType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:choice> </xsd:choice>
</xsd:sequence> </xsd:sequence>
<xsd:attribute name="id" type="xsd:int"/> <xsd:attribute name="id" type="xsd:int"/>
<xsd:attribute name="name" type="xsd:string"/> <xsd:attribute name="name" type="xsd:string"/> <!-- a string name for the structure, especially if we don't have its ID -->
<xsd:attribute name="pp" type="xsd:string"/> <xsd:attribute name="pp" type="xsd:string"/> <!-- the structure's preposition (if any) -->
</xsd:complexType> </xsd:complexType>
<!-- List of syntactic structures -->
<xsd:complexType name="syntacticStructureListType"> <xsd:complexType name="syntacticStructureListType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="syntacticStructure" type="syntacticStructureType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="syntacticStructure" type="syntacticStructureType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence> </xsd:sequence>
<xsd:attribute name="system" type="xsd:string"/> <xsd:attribute name="system" type="xsd:string"/> <!-- probably JOS or UD -->
</xsd:complexType> </xsd:complexType>
<!-- Container associating a semantic role with a list of syntactic structures -->
<xsd:complexType name="semanticRoleContainerType"> <xsd:complexType name="semanticRoleContainerType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="semanticRole" type="semanticRoleType"/> <xsd:element name="semanticRole" type="semanticRoleType"/>
<xsd:element name="syntacticStructureList" type="syntacticStructureListType"/> <xsd:element name="syntacticStructureList" type="syntacticStructureListType"/>
</xsd:sequence> </xsd:sequence>
<xsd:attribute name="cluster" type="xsd:int" use="optional"/> <xsd:attribute name="cluster" type="xsd:int"/> <!-- hmm, number identifying a group of clustered roles? -->
</xsd:complexType> </xsd:complexType>
<!-- List of semantic role containers -->
<xsd:complexType name="semanticRoleContainerListType"> <xsd:complexType name="semanticRoleContainerListType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="semanticRoleContainer" type="semanticRoleContainerType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="semanticRoleContainer" type="semanticRoleContainerType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence> </xsd:sequence>
</xsd:complexType> </xsd:complexType>
<!-- Human-readable string identifying a valency pattern -->
<xsd:simpleType name="patternRepresentationType"> <xsd:simpleType name="patternRepresentationType">
<xsd:restriction base="xsd:string"/> <xsd:restriction base="xsd:string"/>
</xsd:simpleType> </xsd:simpleType>
<!-- Wrapper for valency pattern data -->
<xsd:complexType name="valencyPatternType"> <xsd:complexType name="valencyPatternType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="measureList" type="measureListType"/> <xsd:element name="measureList" type="measureListType"/>
@ -388,6 +428,7 @@
<xsd:attribute name="id" type="xsd:int"/> <xsd:attribute name="id" type="xsd:int"/>
</xsd:complexType> </xsd:complexType>
<!-- List of valency patterns -->
<xsd:complexType name="valencyPatternListType"> <xsd:complexType name="valencyPatternListType">
<xsd:sequence> <xsd:sequence>
<xsd:element name="valencyPattern" type="valencyPatternType" minOccurs="0" maxOccurs="unbounded"/> <xsd:element name="valencyPattern" type="valencyPatternType" minOccurs="0" maxOccurs="unbounded"/>

View File

@ -1,7 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<!-- Rudimentary top-down schema intended for all CJVT dictionary-style datasets. Each such dataset should be
validated both with this schema and with an appropriate more specific schema based on the inventory. -->
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" <xsd:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="http://www.w3.org/2001/xml.xsd" />
schemaLocation="http://www.w3.org/2001/xml.xsd" />
<!-- top-level element is a dictionary --> <!-- top-level element is a dictionary -->
<xsd:element name="dictionary" type="dictionaryType"/> <xsd:element name="dictionary" type="dictionaryType"/>