145 lines
5.8 KiB
Python
145 lines
5.8 KiB
Python
"""
|
|
OpenAI-compatible LLM client for LM Studio integration
|
|
"""
|
|
|
|
import json
|
|
import openai
|
|
from typing import List
|
|
from models import TranslationBatch
|
|
from config import Config
|
|
|
|
|
|
class LLMClient:
|
|
"""OpenAI-compatible LLM client for LM Studio"""
|
|
|
|
def __init__(self, config: Config):
|
|
self.config = config
|
|
self.client = openai.OpenAI(
|
|
base_url=config.llm_config['base_url'],
|
|
api_key=config.llm_config.get('api_key', 'not-needed')
|
|
)
|
|
self.model = config.llm_config['model']
|
|
self.timeout = config.llm_config.get('timeout', 30)
|
|
self.max_retries = config.llm_config.get('max_retries', 3)
|
|
|
|
def translate_batch(self, batch: TranslationBatch, instruction: str) -> List[str]:
|
|
"""Translate a batch of strings"""
|
|
# Prepare the prompt
|
|
prompt = self._build_translation_prompt(batch, instruction)
|
|
|
|
# Build system message with strong emphasis on instructions
|
|
system_message = (
|
|
"You are a professional translator. "
|
|
"You must follow the translation instructions EXACTLY as provided. "
|
|
"Pay special attention to formality level, tone, and any specific requirements mentioned. "
|
|
"Your response must be a valid JSON array."
|
|
)
|
|
|
|
for attempt in range(self.max_retries):
|
|
try:
|
|
response = self.client.chat.completions.create(
|
|
model=self.model,
|
|
messages=[
|
|
{"role": "system", "content": system_message},
|
|
{"role": "user", "content": prompt}
|
|
],
|
|
temperature=0.3,
|
|
timeout=self.timeout
|
|
)
|
|
|
|
# Parse the response
|
|
if not response or not response.choices:
|
|
print(f"Empty response from LLM")
|
|
continue
|
|
|
|
choice = response.choices[0]
|
|
if not hasattr(choice, 'message') or not choice.message:
|
|
print(f"Invalid response structure: {response}")
|
|
continue
|
|
|
|
content = choice.message.content
|
|
if content is None:
|
|
print(f"Empty content in response: {response}")
|
|
continue
|
|
|
|
translations = self._parse_translation_response(content)
|
|
return translations
|
|
|
|
except Exception as e:
|
|
print(f"Translation attempt {attempt + 1} failed: {e}")
|
|
if attempt == self.max_retries - 1:
|
|
print(f"All translation attempts failed for batch")
|
|
return []
|
|
|
|
return []
|
|
|
|
def _build_translation_prompt(self, batch: TranslationBatch, instruction: str) -> str:
|
|
"""Build translation prompt for the batch"""
|
|
prompt = "=== TRANSLATION TASK ===\n\n"
|
|
prompt += f"CRITICAL INSTRUCTIONS (you MUST follow these):\n{instruction}\n\n"
|
|
prompt += "IMPORTANT: Pay special attention to the tone and formality level specified above. "
|
|
prompt += "This is crucial for the quality of the translation.\n\n"
|
|
|
|
# Check if we have any string arrays in the batch
|
|
has_arrays = any(item.item_type == 'string-array' for item in batch.items)
|
|
|
|
if has_arrays:
|
|
prompt += "Note: Some items are string arrays (multiple items separated by ' | '). "
|
|
prompt += "For string arrays, keep the same number of items and use ' | ' as separator.\n\n"
|
|
|
|
prompt += "=== CONTENT TO TRANSLATE ===\n"
|
|
prompt += f"Target language: {batch.target_language}\n\n"
|
|
|
|
for i, item in enumerate(batch.items):
|
|
if item.item_type == 'string-array':
|
|
prompt += f"{i + 1}. [ARRAY] {item.name}: {item.value}\n"
|
|
else:
|
|
prompt += f"{i + 1}. {item.value}\n"
|
|
|
|
prompt += "\n=== RESPONSE FORMAT ===\n"
|
|
prompt += "Return ONLY a JSON array with the translations in the same order:\n"
|
|
prompt += '["translation1", "translation2", ...]\n\n'
|
|
prompt += "=== REMINDER ===\n"
|
|
prompt += f"Remember to follow these instructions: {instruction}\n"
|
|
|
|
return prompt
|
|
|
|
def _parse_translation_response(self, response: str) -> List[str]:
|
|
"""Parse translation response from LLM"""
|
|
if not response or not response.strip():
|
|
print(f"Empty response to parse: {response}")
|
|
return []
|
|
|
|
try:
|
|
# Try to parse as JSON array
|
|
if response.strip().startswith('['):
|
|
return json.loads(response.strip())
|
|
|
|
# Try to extract JSON from response
|
|
start = response.find('[')
|
|
end = response.rfind(']') + 1
|
|
if start != -1 and end != 0:
|
|
return json.loads(response[start:end])
|
|
|
|
# Fallback: split by lines and clean up
|
|
lines = response.strip().split('\n')
|
|
translations = []
|
|
for line in lines:
|
|
line = line.strip()
|
|
# Remove numbering and quotes
|
|
if line and not line.startswith('[') and not line.startswith(']'):
|
|
# Remove numbers, quotes, and extra formatting
|
|
clean_line = line
|
|
if '. ' in clean_line:
|
|
clean_line = clean_line.split('. ', 1)[1]
|
|
clean_line = clean_line.strip('"\'')
|
|
if clean_line:
|
|
translations.append(clean_line)
|
|
|
|
return translations
|
|
|
|
except Exception as e:
|
|
print(f"Error parsing translation response: {e}")
|
|
print(f"Raw response: {response}")
|
|
return []
|