welcome gitea
This commit is contained in:
176
llm_client.py
Normal file
176
llm_client.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""
|
||||
OpenAI-compatible LLM client for VocabListGenerator
|
||||
Adapted from ResourceTranslate/llm_client.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import openai
|
||||
from typing import List, Tuple
|
||||
|
||||
from models import VocabRequest, CEFR_DESCRIPTIONS
|
||||
from config import Config
|
||||
|
||||
|
||||
class LLMClient:
|
||||
"""OpenAI-compatible LLM client (LM Studio / DeepSeek)"""
|
||||
|
||||
def __init__(self, config: Config):
|
||||
self.config = config
|
||||
self.client = openai.OpenAI(
|
||||
base_url=config.llm_config['base_url'],
|
||||
api_key=config.llm_config.get('api_key', 'not-needed')
|
||||
)
|
||||
self.model = config.llm_config['model']
|
||||
self.timeout = config.llm_config.get('timeout', 30)
|
||||
self.max_retries = config.llm_config.get('max_retries', 3)
|
||||
|
||||
def generate_vocabulary(self, request: VocabRequest) -> List[Tuple[str, str]]:
|
||||
"""
|
||||
Ask the LLM to generate vocabulary word pairs for the given request.
|
||||
Returns a list of (word_in_lang_first, word_in_lang_second) tuples.
|
||||
"""
|
||||
prompt = self._build_vocab_prompt(request)
|
||||
|
||||
system_message = (
|
||||
"You are an expert language teacher and vocabulary specialist. "
|
||||
"Generate accurate, natural vocabulary word pairs exactly as instructed. "
|
||||
"Your response must be a valid JSON array and nothing else."
|
||||
)
|
||||
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": system_message},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
temperature=0.7,
|
||||
timeout=self.timeout
|
||||
)
|
||||
|
||||
if not response or not response.choices:
|
||||
print(f" [attempt {attempt + 1}] Empty response from LLM")
|
||||
continue
|
||||
|
||||
choice = response.choices[0]
|
||||
if not hasattr(choice, 'message') or not choice.message:
|
||||
print(f" [attempt {attempt + 1}] Invalid response structure")
|
||||
continue
|
||||
|
||||
content = choice.message.content
|
||||
if content is None:
|
||||
print(f" [attempt {attempt + 1}] Empty content in response")
|
||||
continue
|
||||
|
||||
pairs = self._parse_vocab_response(content)
|
||||
if pairs:
|
||||
return pairs
|
||||
else:
|
||||
print(f" [attempt {attempt + 1}] Could not parse valid pairs from response")
|
||||
|
||||
except Exception as e:
|
||||
print(f" [attempt {attempt + 1}] Failed: {e}")
|
||||
if attempt == self.max_retries - 1:
|
||||
print("All attempts exhausted.")
|
||||
|
||||
return []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _build_vocab_prompt(self, request: VocabRequest) -> str:
|
||||
"""Build the vocabulary generation prompt."""
|
||||
level_desc = CEFR_DESCRIPTIONS.get(request.level, "")
|
||||
lines = [
|
||||
f"Generate exactly {request.amount} vocabulary word pairs for the topic: \"{request.category}\".",
|
||||
"",
|
||||
f"Proficiency level: {request.level}" + (f" — {level_desc}" if level_desc else ""),
|
||||
"",
|
||||
f"First language (wordFirst): {request.lang_first_name}",
|
||||
f"Second language (wordSecond): {request.lang_second_name}",
|
||||
]
|
||||
|
||||
if request.instructions and request.instructions.strip():
|
||||
lines += [
|
||||
"",
|
||||
"Additional instructions (follow these carefully):",
|
||||
request.instructions.strip(),
|
||||
]
|
||||
|
||||
lines += [
|
||||
"",
|
||||
"Rules:",
|
||||
"- Choose vocabulary appropriate for the specified proficiency level.",
|
||||
"- Return ONLY a JSON array. No markdown, no explanation, no extra text.",
|
||||
"- Each element is a 2-item array: [word_in_first_language, word_in_second_language].",
|
||||
f"- The array must contain exactly {request.amount} unique pairs.",
|
||||
"- NO DUPLICATES: Each word in the first language must appear only once.",
|
||||
"- VARY the vocabulary: avoid repeating similar words.",
|
||||
"",
|
||||
"Example format:",
|
||||
'[',
|
||||
' ["Krankenhaus", "hospital"],',
|
||||
' ["Arzt", "médico"]',
|
||||
']',
|
||||
]
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _parse_vocab_response(self, response: str) -> List[Tuple[str, str]]:
|
||||
"""Parse the LLM response into a list of word-pair tuples."""
|
||||
if not response or not response.strip():
|
||||
return []
|
||||
|
||||
try:
|
||||
text = response.strip()
|
||||
|
||||
# Try direct parse first
|
||||
if text.startswith('['):
|
||||
data = json.loads(text)
|
||||
else:
|
||||
# Extract JSON array from surrounding text
|
||||
start = text.find('[')
|
||||
end = text.rfind(']') + 1
|
||||
if start == -1 or end == 0:
|
||||
print(f"Could not locate JSON array in response:\n{response[:500]}")
|
||||
return []
|
||||
data = json.loads(text[start:end])
|
||||
|
||||
# Check for duplicates and log warning
|
||||
seen_first = {}
|
||||
duplicates = []
|
||||
pairs: List[Tuple[str, str]] = []
|
||||
for item in data:
|
||||
if isinstance(item, (list, tuple)) and len(item) >= 2:
|
||||
word_first = str(item[0]).strip()
|
||||
word_second = str(item[1]).strip()
|
||||
|
||||
# Track duplicates
|
||||
if word_first in seen_first:
|
||||
duplicates.append(word_first)
|
||||
else:
|
||||
seen_first[word_first] = word_second
|
||||
|
||||
pairs.append((word_first, word_second))
|
||||
|
||||
# Log duplicates if found
|
||||
if duplicates:
|
||||
unique_dups = list(set(duplicates))
|
||||
print(f" ⚠ Warning: Found {len(duplicates)} duplicate first-language words: {unique_dups[:5]}{'...' if len(unique_dups) > 5 else ''}")
|
||||
# Deduplicate - keep first occurrence only
|
||||
pairs = list(seen_first.items())
|
||||
print(f" → Using {len(pairs)} unique pairs after deduplication")
|
||||
|
||||
return pairs
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"JSON parse error: {e}")
|
||||
print(f"Raw response (first 800 chars):\n{response[:800]}")
|
||||
print(f"Raw response (last 200 chars):\n{response[-200:]}")
|
||||
return []
|
||||
except Exception as e:
|
||||
print(f"Unexpected error parsing response: {e}")
|
||||
print(f"Response preview:\n{response[:500]}")
|
||||
return []
|
||||
Reference in New Issue
Block a user