Files
BatchVocabListGenerator/language_instructions.yaml
jonasgaudian eabe2e2969 welcome gitea
2026-02-19 17:18:23 +01:00

91 lines
3.4 KiB
YAML

# Language-Specific Instructions
# ==============================
# This file contains language-specific instructions that are automatically
# applied when generating vocabulary lists. The instructions are merged with
# any batch-specific instructions.
#
# Supported languages (by ID):
# 1=English 2=Mandarin 3=Spanish 4=Hindi 5=Arabic
# 7=Portuguese 8=Russian 13=Korean 14=French 15=German
# 19=Indonesian 20=Italian 21=Japanese 24=Polish 39=Romanian
# Non-Latin script languages require Latin transcription
# These instructions are added automatically for relevant languages
# Japanese (ID: 21)
21:
name: "Japanese"
transcription: "Include hiragana and romaji in parentheses ONLY for pronunciation. Do NOT add grammatical metadata like formal/informal. Example: 食べる (たべる, taberu)"
# Chinese/Mandarin (ID: 2)
2:
name: "Chinese (Mandarin)"
transcription: "Include pinyin in parentheses ONLY for pronunciation. Do NOT add tone marks or grammatical annotations. Example: 吃 (chī)"
# Korean (ID: 13)
13:
name: "Korean"
transcription: "Include romanization in parentheses ONLY for pronunciation. Do NOT add grammatical metadata. Example: 먹다 (meokda)"
# Russian (ID: 8) - Cyrillic script
8:
name: "Russian"
transcription: "Include transliteration in Latin script in parentheses ONLY for pronunciation. Do NOT add gender or case information."
# Arabic (ID: 5) - Arabic script
5:
name: "Arabic"
transcription: "Include transliteration in Latin script in parentheses ONLY for pronunciation. Do NOT add gender or root information."
# Hindi (ID: 4) - Devanagari script
4:
name: "Hindi"
transcription: "Include transliteration in Latin script in parentheses ONLY for pronunciation. Do NOT add gender or grammatical information."
# Polish (ID: 24) - Latin script but special characters
24:
name: "Polish"
special: "Use proper Polish diacritics (ą, ć, ę, ł, ń, ó, ś, ź, ż)."
# Portuguese (ID: 7)
7:
name: "Portuguese"
variant: "Brazilian Portuguese"
special: "Provide plain vocabulary words only. Do NOT add metadata in parentheses like (formal), (informal), (male), (female). Use Brazilian Portuguese."
# Indonesian (ID: 19) - Latin script
19:
name: "Indonesian"
special: "Use standard Indonesian spelling."
# Romanian (ID: 39) - Latin script
39:
name: "Romanian"
special: "Use proper Romanian diacritics (ă, â, î, ș, ț)."
# Latin script languages - avoid metadata in parentheses
# English (ID: 1)
1:
name: "English"
special: "Provide plain vocabulary words only. Do NOT add metadata in parentheses like (formal), (informal), (male), (female), (slang), etc."
# Spanish (ID: 3)
3:
name: "Spanish"
special: "Provide plain vocabulary words only. Do NOT add metadata in parentheses like (formal), (informal), (male), (female), (slang), etc. Include accents (á, é, í, ó, ú, ü, ñ)."
# French (ID: 14)
14:
name: "French"
special: "Provide plain vocabulary words only. Do NOT add metadata in parentheses like (formal), (informal), (male), (female). Include accents (é, è, ê, ë, à, â, ç)."
# German (ID: 15)
15:
name: "German"
special: "Provide plain vocabulary words only. Do NOT add metadata in parentheses like (formal), (informal), (male), (female). Include umlauts (ä, ö, ü, ß)."
# Italian (ID: 20)
20:
name: "Italian"
special: "Provide plain vocabulary words only. Do NOT add metadata in parentheses like (formale), (informale), (maschile), (femminile). Include accents (à, è, é, ì, ò, ù)."