{ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Universal Wiktionary Dictionary Entry", "description": "Language-agnostic schema for dictionary entries from any Wiktionary edition", "type": "object", "required": [ "word", "pos", "senses" ], "properties": { "word": { "type": "string", "description": "The headword being defined" }, "pos": { "type": "string", "description": "Part of speech (noun, verb, adj, adv, etc.)", "examples": [ "noun", "verb", "adj", "adv", "prep", "conj", "intj", "pron" ] }, "senses": { "type": "array", "description": "Word meanings and usage", "items": { "type": "object", "properties": { "glosses": { "type": "array", "items": { "type": "string" }, "description": "Definition text(s)" }, "examples": { "type": "array", "items": { "type": "string" }, "description": "Usage examples" }, "raw_glosses": { "type": "array", "items": { "type": "string" }, "description": "Unprocessed glosses with markup" }, "tags": { "type": "array", "items": { "type": "string" }, "description": "Sense-specific tags (figurative, colloquial, etc.)" } } } }, "phonetics": { "type": "object", "description": "Pronunciation and sound information", "properties": { "ipa": { "type": "array", "items": { "type": "string" }, "description": "Clean IPA transcription(s) without special characters" }, "ipa_variations": { "type": "array", "description": "Detailed IPA variations with regional information", "items": { "type": "object", "properties": { "ipa": { "type": "string", "description": "Clean IPA transcription" }, "raw_tags": { "type": "array", "items": { "type": "string" }, "description": "Regional information (countries, regions, cities)" } }, "required": ["ipa"] } }, "homophones": { "type": "array", "items": { "type": "string" }, "description": "Words pronounced the same way" } } }, "hyphenation": { "type": "array", "items": { "type": "string" }, "description": "Syllable breaks (e.g., ['Wör', 'ter', 'buch'])" }, "forms": { "description": "Inflected forms. Can be a flat list (universal default for nouns, adj, etc.), a single compressed object (for verbs), or an array of compressed objects (for verbs with multiple usages like reflexive/transitive).", "oneOf": [ { "type": "array", "description": "Default: A flat, uncompressed list of all inflected forms.", "items": { "type": "object", "properties": { "form": { "type": "string" }, "tags": { "type": "array", "items": { "type": "string" } }, "source": { "type": "string" } } } }, { "type": "object", "description": "Compressed: A type-tagged, language-specific set of principal parts.", "properties": { "type": { "type": "string", "description": "Identifier for the compression rules (e.g., 'de_verb', 'fr_noun')." }, "data": { "type": "object", "description": "The compressed principal parts.", "additionalProperties": true } }, "required": [ "type", "data" ] }, { "type": "array", "description": "Multiple compressed forms (e.g., for verbs that can be both reflexive and transitive).", "items": { "type": "object", "properties": { "type": { "type": "string", "description": "Identifier for the compression rules (e.g., 'de_verb')." }, "data": { "type": "object", "description": "The compressed principal parts.", "additionalProperties": true } }, "required": [ "type", "data" ] } } ] }, "grammatical_features": { "type": "object", "description": "Gender, number, case, tense, etc.", "properties": { "gender": { "type": "string", "enum": [ "masculine", "feminine", "neuter", "common" ] }, "number": { "type": "string", "enum": [ "singular", "plural", "dual" ] }, "tags": { "type": "array", "items": { "type": "string" }, "description": "Other grammatical tags" } } }, "etymology": { "type": "object", "description": "Word origin and historical development", "properties": { "text": { "type": "string" }, "texts": { "type": "array", "items": { "type": "string" } }, "number": { "type": "integer" } } }, "relations": { "type": "object", "description": "Semantic and lexical relationships", "properties": { "synonyms": { "type": "array", "items": { "type": "object", "properties": { "word": { "type": "string" }, "sense": { "type": "string" } } } }, "antonyms": { "type": "array", "items": { "type": "object", "properties": { "word": { "type": "string" }, "sense": { "type": "string" } } } }, "hypernyms": { "type": "array", "items": { "type": "string" }, "description": "Broader/parent terms" }, "hyponyms": { "type": "array", "items": { "type": "string" }, "description": "Narrower/child terms" }, "meronyms": { "type": "array", "items": { "type": "string" }, "description": "Part-of relationships" }, "holonyms": { "type": "array", "items": { "type": "string" }, "description": "Whole-of relationships" }, "related": { "type": "array", "items": { "type": "string" }, "description": "Related terms (see also)" }, "derived": { "type": "array", "items": { "type": "string" }, "description": "Derived/compound terms" }, "coordinate_terms": { "type": "array", "items": { "type": "string" }, "description": "Co-hyponyms (sister terms)" } } }, "translations": { "type": "array", "description": "Translations to other languages", "items": { "type": "object", "properties": { "lang_code": { "type": "string" }, "word": { "type": "string" }, "sense_index": { "type": "string" }, "tags": { "type": "array", "items": { "type": "string" } } } } }, "descendants": { "type": "array", "description": "Words in other languages derived from this word", "items": { "type": "object", "properties": { "lang_code": { "type": "string" }, "lang": { "type": "string" }, "word": { "type": "string" }, "tags": { "type": "array", "items": { "type": "string" } } } } } } }