""" OpenAI-compatible LLM client for VocabListGenerator Adapted from ResourceTranslate/llm_client.py """ import json import openai from typing import List, Tuple from models import VocabRequest, CEFR_DESCRIPTIONS from config import Config class LLMClient: """OpenAI-compatible LLM client (LM Studio / DeepSeek)""" def __init__(self, config: Config): self.config = config self.client = openai.OpenAI( base_url=config.llm_config['base_url'], api_key=config.llm_config.get('api_key', 'not-needed') ) self.model = config.llm_config['model'] self.timeout = config.llm_config.get('timeout', 30) self.max_retries = config.llm_config.get('max_retries', 3) def generate_vocabulary(self, request: VocabRequest) -> List[Tuple[str, str]]: """ Ask the LLM to generate vocabulary word pairs for the given request. Returns a list of (word_in_lang_first, word_in_lang_second) tuples. """ prompt = self._build_vocab_prompt(request) system_message = ( "You are an expert language teacher and vocabulary specialist. " "Generate accurate, natural vocabulary word pairs exactly as instructed. " "Your response must be a valid JSON array and nothing else." ) for attempt in range(self.max_retries): try: response = self.client.chat.completions.create( model=self.model, messages=[ {"role": "system", "content": system_message}, {"role": "user", "content": prompt} ], temperature=0.7, timeout=self.timeout ) if not response or not response.choices: print(f" [attempt {attempt + 1}] Empty response from LLM") continue choice = response.choices[0] if not hasattr(choice, 'message') or not choice.message: print(f" [attempt {attempt + 1}] Invalid response structure") continue content = choice.message.content if content is None: print(f" [attempt {attempt + 1}] Empty content in response") continue pairs = self._parse_vocab_response(content) if pairs: return pairs else: print(f" [attempt {attempt + 1}] Could not parse valid pairs from response") except Exception as e: print(f" [attempt {attempt + 1}] Failed: {e}") if attempt == self.max_retries - 1: print("All attempts exhausted.") return [] # ------------------------------------------------------------------ # Private helpers # ------------------------------------------------------------------ def _build_vocab_prompt(self, request: VocabRequest) -> str: """Build the vocabulary generation prompt.""" level_desc = CEFR_DESCRIPTIONS.get(request.level, "") lines = [ f"Generate exactly {request.amount} vocabulary word pairs for the topic: \"{request.category}\".", "", f"Proficiency level: {request.level}" + (f" — {level_desc}" if level_desc else ""), "", f"First language (wordFirst): {request.lang_first_name}", f"Second language (wordSecond): {request.lang_second_name}", ] if request.instructions and request.instructions.strip(): lines += [ "", "Additional instructions (follow these carefully):", request.instructions.strip(), ] lines += [ "", "Rules:", "- Choose vocabulary appropriate for the specified proficiency level.", "- Return ONLY a JSON array. No markdown, no explanation, no extra text.", "- Each element is a 2-item array: [word_in_first_language, word_in_second_language].", f"- The array must contain exactly {request.amount} unique pairs.", "- NO DUPLICATES: Each word in the first language must appear only once.", "- VARY the vocabulary: avoid repeating similar words.", "", "Example format:", '[', ' ["Krankenhaus", "hospital"],', ' ["Arzt", "médico"]', ']', ] return "\n".join(lines) def _parse_vocab_response(self, response: str) -> List[Tuple[str, str]]: """Parse the LLM response into a list of word-pair tuples.""" if not response or not response.strip(): return [] try: text = response.strip() # Try direct parse first if text.startswith('['): data = json.loads(text) else: # Extract JSON array from surrounding text start = text.find('[') end = text.rfind(']') + 1 if start == -1 or end == 0: print(f"Could not locate JSON array in response:\n{response[:500]}") return [] data = json.loads(text[start:end]) # Check for duplicates and log warning seen_first = {} duplicates = [] pairs: List[Tuple[str, str]] = [] for item in data: if isinstance(item, (list, tuple)) and len(item) >= 2: word_first = str(item[0]).strip() word_second = str(item[1]).strip() # Track duplicates if word_first in seen_first: duplicates.append(word_first) else: seen_first[word_first] = word_second pairs.append((word_first, word_second)) # Log duplicates if found if duplicates: unique_dups = list(set(duplicates)) print(f" ⚠ Warning: Found {len(duplicates)} duplicate first-language words: {unique_dups[:5]}{'...' if len(unique_dups) > 5 else ''}") # Deduplicate - keep first occurrence only pairs = list(seen_first.items()) print(f" → Using {len(pairs)} unique pairs after deduplication") return pairs except json.JSONDecodeError as e: print(f"JSON parse error: {e}") print(f"Raw response (first 800 chars):\n{response[:800]}") print(f"Raw response (last 200 chars):\n{response[-200:]}") return [] except Exception as e: print(f"Unexpected error parsing response: {e}") print(f"Response preview:\n{response[:500]}") return []