welcome gitea
This commit is contained in:
90
language_instructions.yaml
Normal file
90
language_instructions.yaml
Normal file
@@ -0,0 +1,90 @@
|
||||
# Language-Specific Instructions
|
||||
# ==============================
|
||||
# This file contains language-specific instructions that are automatically
|
||||
# applied when generating vocabulary lists. The instructions are merged with
|
||||
# any batch-specific instructions.
|
||||
#
|
||||
# Supported languages (by ID):
|
||||
# 1=English 2=Mandarin 3=Spanish 4=Hindi 5=Arabic
|
||||
# 7=Portuguese 8=Russian 13=Korean 14=French 15=German
|
||||
# 19=Indonesian 20=Italian 21=Japanese 24=Polish 39=Romanian
|
||||
|
||||
# Non-Latin script languages require Latin transcription
|
||||
# These instructions are added automatically for relevant languages
|
||||
|
||||
# Japanese (ID: 21)
|
||||
21:
|
||||
name: "Japanese"
|
||||
transcription: "Include hiragana and romaji in parentheses ONLY for pronunciation. Do NOT add grammatical metadata like formal/informal. Example: 食べる (たべる, taberu)"
|
||||
|
||||
# Chinese/Mandarin (ID: 2)
|
||||
2:
|
||||
name: "Chinese (Mandarin)"
|
||||
transcription: "Include pinyin in parentheses ONLY for pronunciation. Do NOT add tone marks or grammatical annotations. Example: 吃 (chī)"
|
||||
|
||||
# Korean (ID: 13)
|
||||
13:
|
||||
name: "Korean"
|
||||
transcription: "Include romanization in parentheses ONLY for pronunciation. Do NOT add grammatical metadata. Example: 먹다 (meokda)"
|
||||
|
||||
# Russian (ID: 8) - Cyrillic script
|
||||
8:
|
||||
name: "Russian"
|
||||
transcription: "Include transliteration in Latin script in parentheses ONLY for pronunciation. Do NOT add gender or case information."
|
||||
|
||||
# Arabic (ID: 5) - Arabic script
|
||||
5:
|
||||
name: "Arabic"
|
||||
transcription: "Include transliteration in Latin script in parentheses ONLY for pronunciation. Do NOT add gender or root information."
|
||||
|
||||
# Hindi (ID: 4) - Devanagari script
|
||||
4:
|
||||
name: "Hindi"
|
||||
transcription: "Include transliteration in Latin script in parentheses ONLY for pronunciation. Do NOT add gender or grammatical information."
|
||||
|
||||
# Polish (ID: 24) - Latin script but special characters
|
||||
24:
|
||||
name: "Polish"
|
||||
special: "Use proper Polish diacritics (ą, ć, ę, ł, ń, ó, ś, ź, ż)."
|
||||
|
||||
# Portuguese (ID: 7)
|
||||
7:
|
||||
name: "Portuguese"
|
||||
variant: "Brazilian Portuguese"
|
||||
special: "Provide plain vocabulary words only. Do NOT add metadata in parentheses like (formal), (informal), (male), (female). Use Brazilian Portuguese."
|
||||
|
||||
# Indonesian (ID: 19) - Latin script
|
||||
19:
|
||||
name: "Indonesian"
|
||||
special: "Use standard Indonesian spelling."
|
||||
|
||||
# Romanian (ID: 39) - Latin script
|
||||
39:
|
||||
name: "Romanian"
|
||||
special: "Use proper Romanian diacritics (ă, â, î, ș, ț)."
|
||||
|
||||
# Latin script languages - avoid metadata in parentheses
|
||||
# English (ID: 1)
|
||||
1:
|
||||
name: "English"
|
||||
special: "Provide plain vocabulary words only. Do NOT add metadata in parentheses like (formal), (informal), (male), (female), (slang), etc."
|
||||
|
||||
# Spanish (ID: 3)
|
||||
3:
|
||||
name: "Spanish"
|
||||
special: "Provide plain vocabulary words only. Do NOT add metadata in parentheses like (formal), (informal), (male), (female), (slang), etc. Include accents (á, é, í, ó, ú, ü, ñ)."
|
||||
|
||||
# French (ID: 14)
|
||||
14:
|
||||
name: "French"
|
||||
special: "Provide plain vocabulary words only. Do NOT add metadata in parentheses like (formal), (informal), (male), (female). Include accents (é, è, ê, ë, à, â, ç)."
|
||||
|
||||
# German (ID: 15)
|
||||
15:
|
||||
name: "German"
|
||||
special: "Provide plain vocabulary words only. Do NOT add metadata in parentheses like (formal), (informal), (male), (female). Include umlauts (ä, ö, ü, ß)."
|
||||
|
||||
# Italian (ID: 20)
|
||||
20:
|
||||
name: "Italian"
|
||||
special: "Provide plain vocabulary words only. Do NOT add metadata in parentheses like (formale), (informale), (maschile), (femminile). Include accents (à, è, é, ì, ò, ù)."
|
||||
Reference in New Issue
Block a user