363 lines
8.9 KiB
JSON
363 lines
8.9 KiB
JSON
{
|
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
"title": "Universal Wiktionary Dictionary Entry",
|
|
"description": "Language-agnostic schema for dictionary entries from any Wiktionary edition",
|
|
"type": "object",
|
|
"required": [
|
|
"word",
|
|
"pos",
|
|
"senses"
|
|
],
|
|
"properties": {
|
|
"word": {
|
|
"type": "string",
|
|
"description": "The headword being defined"
|
|
},
|
|
"pos": {
|
|
"type": "string",
|
|
"description": "Part of speech (noun, verb, adj, adv, etc.)",
|
|
"examples": [
|
|
"noun",
|
|
"verb",
|
|
"adj",
|
|
"adv",
|
|
"prep",
|
|
"conj",
|
|
"intj",
|
|
"pron"
|
|
]
|
|
},
|
|
"senses": {
|
|
"type": "array",
|
|
"description": "Word meanings and usage",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"glosses": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Definition text(s)"
|
|
},
|
|
"examples": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Usage examples"
|
|
},
|
|
"raw_glosses": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Unprocessed glosses with markup"
|
|
},
|
|
"tags": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Sense-specific tags (figurative, colloquial, etc.)"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"phonetics": {
|
|
"type": "object",
|
|
"description": "Pronunciation and sound information",
|
|
"properties": {
|
|
"ipa": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Clean IPA transcription(s) without special characters"
|
|
},
|
|
"ipa_variations": {
|
|
"type": "array",
|
|
"description": "Detailed IPA variations with regional information",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"ipa": {
|
|
"type": "string",
|
|
"description": "Clean IPA transcription"
|
|
},
|
|
"raw_tags": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Regional information (countries, regions, cities)"
|
|
}
|
|
},
|
|
"required": ["ipa"]
|
|
}
|
|
},
|
|
"homophones": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Words pronounced the same way"
|
|
}
|
|
}
|
|
},
|
|
"hyphenation": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Syllable breaks (e.g., ['Wör', 'ter', 'buch'])"
|
|
},
|
|
"forms": {
|
|
"description": "Inflected forms. Can be a flat list (universal default for nouns, adj, etc.), a single compressed object (for verbs), or an array of compressed objects (for verbs with multiple usages like reflexive/transitive).",
|
|
"oneOf": [
|
|
{
|
|
"type": "array",
|
|
"description": "Default: A flat, uncompressed list of all inflected forms.",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"form": {
|
|
"type": "string"
|
|
},
|
|
"tags": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
}
|
|
},
|
|
"source": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "object",
|
|
"description": "Compressed: A type-tagged, language-specific set of principal parts.",
|
|
"properties": {
|
|
"type": {
|
|
"type": "string",
|
|
"description": "Identifier for the compression rules (e.g., 'de_verb', 'fr_noun')."
|
|
},
|
|
"data": {
|
|
"type": "object",
|
|
"description": "The compressed principal parts.",
|
|
"additionalProperties": true
|
|
}
|
|
},
|
|
"required": [
|
|
"type",
|
|
"data"
|
|
]
|
|
},
|
|
{
|
|
"type": "array",
|
|
"description": "Multiple compressed forms (e.g., for verbs that can be both reflexive and transitive).",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"type": {
|
|
"type": "string",
|
|
"description": "Identifier for the compression rules (e.g., 'de_verb')."
|
|
},
|
|
"data": {
|
|
"type": "object",
|
|
"description": "The compressed principal parts.",
|
|
"additionalProperties": true
|
|
}
|
|
},
|
|
"required": [
|
|
"type",
|
|
"data"
|
|
]
|
|
}
|
|
}
|
|
]
|
|
},
|
|
"grammatical_features": {
|
|
"type": "object",
|
|
"description": "Gender, number, case, tense, etc.",
|
|
"properties": {
|
|
"gender": {
|
|
"type": "string",
|
|
"enum": [
|
|
"masculine",
|
|
"feminine",
|
|
"neuter",
|
|
"common"
|
|
]
|
|
},
|
|
"number": {
|
|
"type": "string",
|
|
"enum": [
|
|
"singular",
|
|
"plural",
|
|
"dual"
|
|
]
|
|
},
|
|
"tags": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Other grammatical tags"
|
|
}
|
|
}
|
|
},
|
|
"etymology": {
|
|
"type": "object",
|
|
"description": "Word origin and historical development",
|
|
"properties": {
|
|
"text": {
|
|
"type": "string"
|
|
},
|
|
"texts": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
}
|
|
},
|
|
"number": {
|
|
"type": "integer"
|
|
}
|
|
}
|
|
},
|
|
"relations": {
|
|
"type": "object",
|
|
"description": "Semantic and lexical relationships",
|
|
"properties": {
|
|
"synonyms": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"word": {
|
|
"type": "string"
|
|
},
|
|
"sense": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"antonyms": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"word": {
|
|
"type": "string"
|
|
},
|
|
"sense": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"hypernyms": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Broader/parent terms"
|
|
},
|
|
"hyponyms": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Narrower/child terms"
|
|
},
|
|
"meronyms": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Part-of relationships"
|
|
},
|
|
"holonyms": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Whole-of relationships"
|
|
},
|
|
"related": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Related terms (see also)"
|
|
},
|
|
"derived": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Derived/compound terms"
|
|
},
|
|
"coordinate_terms": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Co-hyponyms (sister terms)"
|
|
}
|
|
}
|
|
},
|
|
"translations": {
|
|
"type": "array",
|
|
"description": "Translations to other languages",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"lang_code": {
|
|
"type": "string"
|
|
},
|
|
"word": {
|
|
"type": "string"
|
|
},
|
|
"sense_index": {
|
|
"type": "string"
|
|
},
|
|
"tags": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"descendants": {
|
|
"type": "array",
|
|
"description": "Words in other languages derived from this word",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"lang_code": {
|
|
"type": "string"
|
|
},
|
|
"lang": {
|
|
"type": "string"
|
|
},
|
|
"word": {
|
|
"type": "string"
|
|
},
|
|
"tags": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|