Migrate to gitea

This commit is contained in:
jonasgaudian
2026-02-13 00:10:40 +01:00
commit 6d06a9e14e
38 changed files with 31427 additions and 0 deletions

View File

@@ -0,0 +1,362 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Universal Wiktionary Dictionary Entry",
"description": "Language-agnostic schema for dictionary entries from any Wiktionary edition",
"type": "object",
"required": [
"word",
"pos",
"senses"
],
"properties": {
"word": {
"type": "string",
"description": "The headword being defined"
},
"pos": {
"type": "string",
"description": "Part of speech (noun, verb, adj, adv, etc.)",
"examples": [
"noun",
"verb",
"adj",
"adv",
"prep",
"conj",
"intj",
"pron"
]
},
"senses": {
"type": "array",
"description": "Word meanings and usage",
"items": {
"type": "object",
"properties": {
"glosses": {
"type": "array",
"items": {
"type": "string"
},
"description": "Definition text(s)"
},
"examples": {
"type": "array",
"items": {
"type": "string"
},
"description": "Usage examples"
},
"raw_glosses": {
"type": "array",
"items": {
"type": "string"
},
"description": "Unprocessed glosses with markup"
},
"tags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Sense-specific tags (figurative, colloquial, etc.)"
}
}
}
},
"phonetics": {
"type": "object",
"description": "Pronunciation and sound information",
"properties": {
"ipa": {
"type": "array",
"items": {
"type": "string"
},
"description": "Clean IPA transcription(s) without special characters"
},
"ipa_variations": {
"type": "array",
"description": "Detailed IPA variations with regional information",
"items": {
"type": "object",
"properties": {
"ipa": {
"type": "string",
"description": "Clean IPA transcription"
},
"raw_tags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Regional information (countries, regions, cities)"
}
},
"required": ["ipa"]
}
},
"homophones": {
"type": "array",
"items": {
"type": "string"
},
"description": "Words pronounced the same way"
}
}
},
"hyphenation": {
"type": "array",
"items": {
"type": "string"
},
"description": "Syllable breaks (e.g., ['Wör', 'ter', 'buch'])"
},
"forms": {
"description": "Inflected forms. Can be a flat list (universal default for nouns, adj, etc.), a single compressed object (for verbs), or an array of compressed objects (for verbs with multiple usages like reflexive/transitive).",
"oneOf": [
{
"type": "array",
"description": "Default: A flat, uncompressed list of all inflected forms.",
"items": {
"type": "object",
"properties": {
"form": {
"type": "string"
},
"tags": {
"type": "array",
"items": {
"type": "string"
}
},
"source": {
"type": "string"
}
}
}
},
{
"type": "object",
"description": "Compressed: A type-tagged, language-specific set of principal parts.",
"properties": {
"type": {
"type": "string",
"description": "Identifier for the compression rules (e.g., 'de_verb', 'fr_noun')."
},
"data": {
"type": "object",
"description": "The compressed principal parts.",
"additionalProperties": true
}
},
"required": [
"type",
"data"
]
},
{
"type": "array",
"description": "Multiple compressed forms (e.g., for verbs that can be both reflexive and transitive).",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"description": "Identifier for the compression rules (e.g., 'de_verb')."
},
"data": {
"type": "object",
"description": "The compressed principal parts.",
"additionalProperties": true
}
},
"required": [
"type",
"data"
]
}
}
]
},
"grammatical_features": {
"type": "object",
"description": "Gender, number, case, tense, etc.",
"properties": {
"gender": {
"type": "string",
"enum": [
"masculine",
"feminine",
"neuter",
"common"
]
},
"number": {
"type": "string",
"enum": [
"singular",
"plural",
"dual"
]
},
"tags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Other grammatical tags"
}
}
},
"etymology": {
"type": "object",
"description": "Word origin and historical development",
"properties": {
"text": {
"type": "string"
},
"texts": {
"type": "array",
"items": {
"type": "string"
}
},
"number": {
"type": "integer"
}
}
},
"relations": {
"type": "object",
"description": "Semantic and lexical relationships",
"properties": {
"synonyms": {
"type": "array",
"items": {
"type": "object",
"properties": {
"word": {
"type": "string"
},
"sense": {
"type": "string"
}
}
}
},
"antonyms": {
"type": "array",
"items": {
"type": "object",
"properties": {
"word": {
"type": "string"
},
"sense": {
"type": "string"
}
}
}
},
"hypernyms": {
"type": "array",
"items": {
"type": "string"
},
"description": "Broader/parent terms"
},
"hyponyms": {
"type": "array",
"items": {
"type": "string"
},
"description": "Narrower/child terms"
},
"meronyms": {
"type": "array",
"items": {
"type": "string"
},
"description": "Part-of relationships"
},
"holonyms": {
"type": "array",
"items": {
"type": "string"
},
"description": "Whole-of relationships"
},
"related": {
"type": "array",
"items": {
"type": "string"
},
"description": "Related terms (see also)"
},
"derived": {
"type": "array",
"items": {
"type": "string"
},
"description": "Derived/compound terms"
},
"coordinate_terms": {
"type": "array",
"items": {
"type": "string"
},
"description": "Co-hyponyms (sister terms)"
}
}
},
"translations": {
"type": "array",
"description": "Translations to other languages",
"items": {
"type": "object",
"properties": {
"lang_code": {
"type": "string"
},
"word": {
"type": "string"
},
"sense_index": {
"type": "string"
},
"tags": {
"type": "array",
"items": {
"type": "string"
}
}
}
}
},
"descendants": {
"type": "array",
"description": "Words in other languages derived from this word",
"items": {
"type": "object",
"properties": {
"lang_code": {
"type": "string"
},
"lang": {
"type": "string"
},
"word": {
"type": "string"
},
"tags": {
"type": "array",
"items": {
"type": "string"
}
}
}
}
}
}
}