BatchVocabListGenerator/models.py

"""
Data models for VocabListGenerator
"""

from dataclasses import dataclass


# Valid CEFR proficiency levels
CEFR_LEVELS = ("A1", "A2", "B1", "B2", "C1", "C2")

CEFR_DESCRIPTIONS = {
    "A1": "Beginner — absolute basics, the 100-200 most common words, simple concrete concepts",
    "A2": "Elementary — basic everyday vocabulary, simple familiar topics",
    "B1": "Intermediate — practical vocabulary for familiar topics, travel, work",
    "B2": "Upper-Intermediate — broader vocabulary including abstract and technical topics",
    "C1": "Advanced — precise vocabulary, idiomatic expressions, nuanced meaning",
    "C2": "Proficient — near-native vocabulary, highly specialised or literary terms",
}


@dataclass
class VocabRequest:
    """Represents a single vocabulary generation request"""
    amount: int             # Number of word pairs to generate
    lang_first_id: int      # Polly language ID for the first language
    lang_second_id: int     # Polly language ID for the second language
    lang_first_name: str    # Human-readable name of the first language (sent to LLM)
    lang_second_name: str   # Human-readable name of the second language (sent to LLM)
    category: str           # Topic / category of the vocabulary list
    instructions: str       # Any additional instructions for the LLM
    level: str = "A2"       # CEFR proficiency level (A1, A2, B1, B2, C1, C2)